mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-18 22:08:53 +08:00
Refactor creation of buffer surface state 1/n
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
53d77bb801
commit
9d56939980
@@ -702,6 +702,7 @@ include_directories(${NEO_BUILD_DIR})
|
||||
include_directories(${NEO_SOURCE_DIR})
|
||||
include_directories(${NEO_SHARED_DIRECTORY}/aub_mem_dump/definitions${BRANCH_DIR_SUFFIX})
|
||||
include_directories(${NEO_SHARED_DIRECTORY}/built_ins/builtinops${BRANCH_DIR_SUFFIX})
|
||||
include_directories(${NEO_SHARED_DIRECTORY}/command_container/definitions${BRANCH_DIR_SUFFIX})
|
||||
include_directories(${NEO_SHARED_DIRECTORY}/command_stream/definitions${BRANCH_DIR_SUFFIX})
|
||||
include_directories(${NEO_SHARED_DIRECTORY}/compiler_interface/compiler_options${BRANCH_DIR_SUFFIX})
|
||||
include_directories(${NEO_SHARED_DIRECTORY}/debug_settings/definitions${BRANCH_DIR_SUFFIX})
|
||||
|
||||
@@ -169,13 +169,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
if (neoDevice->getDebugger()) {
|
||||
auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
|
||||
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
|
||||
auto debugSurface = device->getDebugSurface();
|
||||
auto mocs = device->getMOCS(false, false);
|
||||
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
|
||||
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(&surfaceState, debugSurface->getGpuAddress(),
|
||||
debugSurface->getUnderlyingBufferSize(), mocs,
|
||||
false, false, false, neoDevice->getNumGenericSubDevices(),
|
||||
debugSurface, neoDevice->getGmmHelper(), kernelImp->getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, 1u);
|
||||
|
||||
NEO::EncodeSurfaceStateArgs args;
|
||||
args.outMemory = &surfaceState;
|
||||
args.graphicsAddress = device->getDebugSurface()->getGpuAddress();
|
||||
args.size = device->getDebugSurface()->getUnderlyingBufferSize();
|
||||
args.mocs = device->getMOCS(false, false);
|
||||
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
|
||||
args.allocation = device->getDebugSurface();
|
||||
args.gmmHelper = neoDevice->getGmmHelper();
|
||||
args.useGlobalAtomics = kernelImp->getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics;
|
||||
args.areMultipleSubDevicesInContext = true;
|
||||
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
|
||||
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
|
||||
}
|
||||
|
||||
|
||||
@@ -284,14 +284,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
if (neoDevice->getDebugger()) {
|
||||
auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
|
||||
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
|
||||
auto debugSurface = device->getDebugSurface();
|
||||
auto mocs = device->getMOCS(false, false);
|
||||
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
|
||||
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(&surfaceState, debugSurface->getGpuAddress(),
|
||||
debugSurface->getUnderlyingBufferSize(), mocs,
|
||||
false, false, false, neoDevice->getNumGenericSubDevices(),
|
||||
debugSurface, neoDevice->getGmmHelper(),
|
||||
kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, 1u);
|
||||
|
||||
NEO::EncodeSurfaceStateArgs args;
|
||||
args.outMemory = &surfaceState;
|
||||
args.graphicsAddress = device->getDebugSurface()->getGpuAddress();
|
||||
args.size = device->getDebugSurface()->getUnderlyingBufferSize();
|
||||
args.mocs = device->getMOCS(false, false);
|
||||
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
|
||||
args.allocation = device->getDebugSurface();
|
||||
args.gmmHelper = neoDevice->getGmmHelper();
|
||||
args.useGlobalAtomics = kernelImp->getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics;
|
||||
args.areMultipleSubDevicesInContext = true;
|
||||
|
||||
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
|
||||
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
|
||||
}
|
||||
// Attach Function residency to our CommandList residency
|
||||
|
||||
@@ -66,13 +66,20 @@ struct KernelHw : public KernelImp {
|
||||
if (allocData && allocData->allocationFlagsProperty.flags.locallyUncachedResource) {
|
||||
l3Enabled = false;
|
||||
}
|
||||
auto mocs = this->module->getDevice()->getMOCS(l3Enabled, false);
|
||||
|
||||
NEO::Device *neoDevice = module->getDevice()->getNEODevice();
|
||||
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(&surfaceState, bufferAddressForSsh, bufferSizeForSsh, mocs,
|
||||
false, false, false, neoDevice->getNumGenericSubDevices(),
|
||||
alloc, neoDevice->getGmmHelper(),
|
||||
kernelImmData->getDescriptor().kernelAttributes.flags.useGlobalAtomics, 1u);
|
||||
|
||||
NEO::EncodeSurfaceStateArgs args;
|
||||
args.outMemory = &surfaceState;
|
||||
args.graphicsAddress = bufferAddressForSsh;
|
||||
args.size = bufferSizeForSsh;
|
||||
args.mocs = this->module->getDevice()->getMOCS(l3Enabled, false);
|
||||
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
|
||||
args.allocation = alloc;
|
||||
args.gmmHelper = neoDevice->getGmmHelper();
|
||||
args.useGlobalAtomics = kernelImmData->getDescriptor().kernelAttributes.flags.useGlobalAtomics;
|
||||
args.areMultipleSubDevicesInContext = true;
|
||||
|
||||
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
|
||||
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateAddress) = surfaceState;
|
||||
}
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ set(RUNTIME_SRCS_MEM_OBJ
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper_common.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/pipe.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/pipe.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}buffer_ext.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}buffer_ext.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}image_ext.inl
|
||||
)
|
||||
|
||||
|
||||
@@ -19,10 +19,11 @@
|
||||
#include <functional>
|
||||
|
||||
namespace NEO {
|
||||
class Device;
|
||||
class Buffer;
|
||||
class ClDevice;
|
||||
class Device;
|
||||
class MemoryManager;
|
||||
struct EncodeSurfaceStateArgs;
|
||||
|
||||
using BufferCreatFunc = Buffer *(*)(Context *context,
|
||||
MemoryProperties memoryProperties,
|
||||
@@ -193,6 +194,8 @@ class Buffer : public MemObj {
|
||||
static bool isReadOnlyMemoryPermittedByFlags(const MemoryProperties &properties);
|
||||
|
||||
void transferData(void *dst, void *src, size_t copySize, size_t copyOffset);
|
||||
|
||||
void appendSurfaceStateArgs(EncodeSurfaceStateArgs &args);
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
@@ -214,7 +217,6 @@ class BufferHw : public Buffer {
|
||||
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation,
|
||||
bool isReadOnlyArgument, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) override;
|
||||
void appendSurfaceStateExt(void *memory);
|
||||
|
||||
static Buffer *create(Context *context,
|
||||
MemoryProperties memoryProperties,
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#include "opencl/source/helpers/surface_formats.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
#include "buffer_ext.inl"
|
||||
#include "hw_cmds.h"
|
||||
|
||||
namespace NEO {
|
||||
@@ -39,11 +38,21 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, boo
|
||||
auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex);
|
||||
const auto isReadOnly = isValueSet(getFlags(), CL_MEM_READ_ONLY) || isReadOnlyArgument;
|
||||
EncodeSurfaceState<GfxFamily>::encodeBuffer(memory, getBufferAddress(rootDeviceIndex),
|
||||
getSurfaceSize(alignSizeForAuxTranslation, rootDeviceIndex),
|
||||
getMocsValue(disableL3, isReadOnly, rootDeviceIndex),
|
||||
true, forceNonAuxMode, isReadOnly, device.getNumGenericSubDevices(),
|
||||
graphicsAllocation, device.getGmmHelper(), useGlobalAtomics, areMultipleSubDevicesInContext);
|
||||
appendSurfaceStateExt(memory);
|
||||
|
||||
NEO::EncodeSurfaceStateArgs args;
|
||||
args.outMemory = memory;
|
||||
args.graphicsAddress = getBufferAddress(rootDeviceIndex);
|
||||
args.size = getSurfaceSize(alignSizeForAuxTranslation, rootDeviceIndex);
|
||||
args.mocs = getMocsValue(disableL3, isReadOnly, rootDeviceIndex);
|
||||
args.cpuCoherent = true;
|
||||
args.forceNonAuxMode = forceNonAuxMode;
|
||||
args.isReadOnly = isReadOnly;
|
||||
args.numAvailableDevices = device.getNumGenericSubDevices();
|
||||
args.allocation = graphicsAllocation;
|
||||
args.gmmHelper = device.getGmmHelper();
|
||||
args.useGlobalAtomics = useGlobalAtomics;
|
||||
args.areMultipleSubDevicesInContext = areMultipleSubDevicesInContext;
|
||||
appendSurfaceStateArgs(args);
|
||||
EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
17
opencl/source/mem_obj/definitions/buffer_ext.cpp
Normal file
17
opencl/source/mem_obj/definitions/buffer_ext.cpp
Normal file
@@ -0,0 +1,17 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
#include "encode_surface_state_args.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
void Buffer::appendSurfaceStateArgs(EncodeSurfaceStateArgs &args) {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -1,16 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void BufferHw<GfxFamily>::appendSurfaceStateExt(void *memory) {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -500,9 +500,17 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, RenderSurfaceStateXeHPAndLaterTests, givenSpecificP
|
||||
std::unique_ptr<BufferHw<FamilyType>> buffer(static_cast<BufferHw<FamilyType> *>(
|
||||
BufferHw<FamilyType>::create(&context, {}, 0, 0, allocationSize, nullptr, nullptr, multiGraphicsAllocation, false, false, false)));
|
||||
|
||||
EncodeSurfaceState<FamilyType>::encodeBuffer(&rssCmd, allocation->getGpuAddress(), allocation->getUnderlyingBufferSize(),
|
||||
buffer->getMocsValue(false, false, pClDevice->getRootDeviceIndex()), false, false, false,
|
||||
pClDevice->getNumGenericSubDevices(), allocation, pClDevice->getGmmHelper(), false, 1u);
|
||||
NEO::EncodeSurfaceStateArgs args;
|
||||
args.outMemory = &rssCmd;
|
||||
args.graphicsAddress = allocation->getGpuAddress();
|
||||
args.size = allocation->getUnderlyingBufferSize();
|
||||
args.mocs = buffer->getMocsValue(false, false, pClDevice->getRootDeviceIndex());
|
||||
args.numAvailableDevices = pClDevice->getNumGenericSubDevices();
|
||||
args.allocation = allocation;
|
||||
args.gmmHelper = pClDevice->getGmmHelper();
|
||||
args.areMultipleSubDevicesInContext = true;
|
||||
|
||||
EncodeSurfaceState<FamilyType>::encodeBuffer(args);
|
||||
|
||||
EXPECT_EQ(FamilyType::RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, rssCmd.getCoherencyType());
|
||||
}
|
||||
|
||||
@@ -15,6 +15,9 @@ set(NEO_CORE_COMMAND_CONTAINER
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/encode_compute_mode_tgllp_and_later.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/implicit_scaling.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/implicit_scaling.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/definitions/encode_surface_state_args_base.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}encode_surface_state.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}encode_surface_state_args.h
|
||||
)
|
||||
|
||||
if(SUPPORT_XEHP_AND_LATER)
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
|
||||
#include "shared/source/kernel/kernel_arg_descriptor.h"
|
||||
|
||||
#include "encode_surface_state_args.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace NEO {
|
||||
@@ -246,12 +248,10 @@ struct EncodeSurfaceState {
|
||||
using AUXILIARY_SURFACE_MODE = typename R_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
|
||||
using COHERENCY_TYPE = typename R_SURFACE_STATE::COHERENCY_TYPE;
|
||||
|
||||
static void encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs,
|
||||
bool cpuCoherent, bool forceNonAuxMode, bool isReadOnly, uint32_t numAvailableDevices,
|
||||
GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool useGlobalAtomics, bool areMultipleSubDevicesInContext);
|
||||
static void encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
|
||||
bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, bool areMultipleSubDevicesInContext);
|
||||
static void encodeBuffer(EncodeSurfaceStateArgs &args);
|
||||
static void encodeExtraBufferParams(EncodeSurfaceStateArgs &args);
|
||||
static void encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo);
|
||||
static void appendBufferSurfaceState(EncodeSurfaceStateArgs &args);
|
||||
|
||||
static constexpr uintptr_t getSurfaceBaseAddressAlignmentMask() {
|
||||
return ~(getSurfaceBaseAddressAlignment() - 1);
|
||||
|
||||
@@ -26,6 +26,8 @@
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
|
||||
#include "encode_surface_state.inl"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace NEO {
|
||||
@@ -337,21 +339,19 @@ void EncodeStoreMMIO<Family>::encode(LinearStream &csr, uint32_t offset, uint64_
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeSurfaceState<Family>::encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs,
|
||||
bool cpuCoherent, bool forceNonAuxMode, bool isReadOnly, uint32_t numAvailableDevices,
|
||||
GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) {
|
||||
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(dst);
|
||||
UNRECOVERABLE_IF(!isAligned<getSurfaceBaseAddressAlignment()>(size));
|
||||
void EncodeSurfaceState<Family>::encodeBuffer(EncodeSurfaceStateArgs &args) {
|
||||
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(args.outMemory);
|
||||
UNRECOVERABLE_IF(!isAligned<getSurfaceBaseAddressAlignment()>(args.size));
|
||||
|
||||
SURFACE_STATE_BUFFER_LENGTH Length = {0};
|
||||
Length.Length = static_cast<uint32_t>(size - 1);
|
||||
Length.Length = static_cast<uint32_t>(args.size - 1);
|
||||
|
||||
surfaceState->setWidth(Length.SurfaceState.Width + 1);
|
||||
surfaceState->setHeight(Length.SurfaceState.Height + 1);
|
||||
surfaceState->setDepth(Length.SurfaceState.Depth + 1);
|
||||
|
||||
surfaceState->setSurfaceType((address != 0) ? R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER
|
||||
: R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL);
|
||||
surfaceState->setSurfaceType((args.graphicsAddress != 0) ? R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER
|
||||
: R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL);
|
||||
surfaceState->setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW);
|
||||
surfaceState->setSurfaceVerticalAlignment(R_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4);
|
||||
surfaceState->setSurfaceHorizontalAlignment(R_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4);
|
||||
@@ -359,25 +359,27 @@ void EncodeSurfaceState<Family>::encodeBuffer(void *dst, uint64_t address, size_
|
||||
surfaceState->setTileMode(R_SURFACE_STATE::TILE_MODE_LINEAR);
|
||||
surfaceState->setVerticalLineStride(0);
|
||||
surfaceState->setVerticalLineStrideOffset(0);
|
||||
surfaceState->setMemoryObjectControlState(mocs);
|
||||
surfaceState->setSurfaceBaseAddress(address);
|
||||
surfaceState->setMemoryObjectControlState(args.mocs);
|
||||
surfaceState->setSurfaceBaseAddress(args.graphicsAddress);
|
||||
|
||||
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE);
|
||||
|
||||
setCoherencyType(surfaceState, cpuCoherent ? R_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT : R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
|
||||
setCoherencyType(surfaceState, args.cpuCoherent ? R_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT : R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
|
||||
|
||||
Gmm *gmm = allocation ? allocation->getDefaultGmm() : nullptr;
|
||||
if (gmm && gmm->isCompressionEnabled && !forceNonAuxMode) {
|
||||
Gmm *gmm = args.allocation ? args.allocation->getDefaultGmm() : nullptr;
|
||||
if (gmm && gmm->isCompressionEnabled && !args.forceNonAuxMode) {
|
||||
// Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios
|
||||
setCoherencyType(surfaceState, R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
|
||||
setBufferAuxParamsForCCS(surfaceState);
|
||||
}
|
||||
|
||||
if (DebugManager.flags.DisableCachingForStatefulBufferAccess.get()) {
|
||||
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
|
||||
surfaceState->setMemoryObjectControlState(args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
|
||||
}
|
||||
|
||||
EncodeSurfaceState<Family>::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, isReadOnly, numAvailableDevices, useGlobalAtomics, areMultipleSubDevicesInContext);
|
||||
EncodeSurfaceState<Family>::encodeExtraBufferParams(args);
|
||||
|
||||
EncodeSurfaceState<Family>::appendBufferSurfaceState(args);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
|
||||
@@ -449,9 +449,9 @@ inline size_t EncodeWA<GfxFamily>::getAdditionalPipelineSelectSize(Device &devic
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void EncodeSurfaceState<GfxFamily>::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
|
||||
bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) {
|
||||
encodeExtraCacheSettings(surfaceState, *gmmHelper->getHardwareInfo());
|
||||
inline void EncodeSurfaceState<GfxFamily>::encodeExtraBufferParams(EncodeSurfaceStateArgs &args) {
|
||||
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(args.outMemory);
|
||||
encodeExtraCacheSettings(surfaceState, *args.gmmHelper->getHardwareInfo());
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -618,33 +618,33 @@ bool EncodeSurfaceState<Family>::doBindingTablePrefetch() {
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeSurfaceState<Family>::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
|
||||
bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) {
|
||||
Gmm *gmm = allocation ? allocation->getDefaultGmm() : nullptr;
|
||||
void EncodeSurfaceState<Family>::encodeExtraBufferParams(EncodeSurfaceStateArgs &args) {
|
||||
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(args.outMemory);
|
||||
Gmm *gmm = args.allocation ? args.allocation->getDefaultGmm() : nullptr;
|
||||
uint32_t compressionFormat = 0;
|
||||
|
||||
bool setConstCachePolicy = false;
|
||||
if (allocation && allocation->getAllocationType() == GraphicsAllocation::AllocationType::CONSTANT_SURFACE) {
|
||||
if (args.allocation && args.allocation->getAllocationType() == GraphicsAllocation::AllocationType::CONSTANT_SURFACE) {
|
||||
setConstCachePolicy = true;
|
||||
}
|
||||
|
||||
if (surfaceState->getMemoryObjectControlState() == gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) &&
|
||||
if (surfaceState->getMemoryObjectControlState() == args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) &&
|
||||
DebugManager.flags.ForceL1Caching.get() != 0) {
|
||||
setConstCachePolicy = true;
|
||||
}
|
||||
|
||||
if (setConstCachePolicy == true) {
|
||||
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST));
|
||||
surfaceState->setMemoryObjectControlState(args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST));
|
||||
}
|
||||
|
||||
encodeExtraCacheSettings(surfaceState, *gmmHelper->getHardwareInfo());
|
||||
DeviceBitfield deviceBitfield{static_cast<uint32_t>(maxNBitValue(numAvailableDevices))};
|
||||
encodeExtraCacheSettings(surfaceState, *args.gmmHelper->getHardwareInfo());
|
||||
DeviceBitfield deviceBitfield{static_cast<uint32_t>(maxNBitValue(args.numAvailableDevices))};
|
||||
bool implicitScaling = ImplicitScalingHelper::isImplicitScalingEnabled(deviceBitfield, true);
|
||||
bool enablePartialWrites = implicitScaling;
|
||||
bool enableMultiGpuAtomics = enablePartialWrites;
|
||||
|
||||
if (DebugManager.flags.EnableMultiGpuAtomicsOptimization.get()) {
|
||||
enableMultiGpuAtomics = useGlobalAtomics && (enablePartialWrites || areMultipleSubDevicesInContext);
|
||||
enableMultiGpuAtomics = args.useGlobalAtomics && (enablePartialWrites || args.areMultipleSubDevicesInContext);
|
||||
}
|
||||
|
||||
surfaceState->setDisableSupportForMultiGpuAtomics(!enableMultiGpuAtomics);
|
||||
@@ -660,7 +660,7 @@ void EncodeSurfaceState<Family>::encodeExtraBufferParams(R_SURFACE_STATE *surfac
|
||||
|
||||
if (EncodeSurfaceState<Family>::isAuxModeEnabled(surfaceState, gmm)) {
|
||||
auto resourceFormat = gmm->gmmResourceInfo->getResourceFormat();
|
||||
compressionFormat = gmmHelper->getClientContext()->getSurfaceStateCompressionFormat(resourceFormat);
|
||||
compressionFormat = args.gmmHelper->getClientContext()->getSurfaceStateCompressionFormat(resourceFormat);
|
||||
|
||||
if (DebugManager.flags.ForceBufferCompressionFormat.get() != -1) {
|
||||
compressionFormat = DebugManager.flags.ForceBufferCompressionFormat.get();
|
||||
@@ -668,7 +668,7 @@ void EncodeSurfaceState<Family>::encodeExtraBufferParams(R_SURFACE_STATE *surfac
|
||||
}
|
||||
|
||||
if (DebugManager.flags.EnableStatelessCompressionWithUnifiedMemory.get()) {
|
||||
if (allocation && !MemoryPool::isSystemMemoryPool(allocation->getMemoryPool())) {
|
||||
if (args.allocation && !MemoryPool::isSystemMemoryPool(args.allocation->getMemoryPool())) {
|
||||
setCoherencyType(surfaceState, R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
|
||||
setBufferAuxParamsForCCS(surfaceState);
|
||||
compressionFormat = DebugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get();
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/definitions/encode_surface_state_args_base.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void EncodeSurfaceState<GfxFamily>::appendBufferSurfaceState(EncodeSurfaceStateArgs &args) {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/command_container/definitions/encode_surface_state_args_base.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct EncodeSurfaceStateArgs : EncodeSurfaceStateArgsBase {
|
||||
EncodeSurfaceStateArgs() = default;
|
||||
};
|
||||
} // namespace NEO
|
||||
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
class GmmHelper;
|
||||
class GraphicsAllocation;
|
||||
|
||||
struct EncodeSurfaceStateArgsBase {
|
||||
uint64_t graphicsAddress = 0ull;
|
||||
size_t size = 0u;
|
||||
|
||||
void *outMemory = nullptr;
|
||||
|
||||
GraphicsAllocation *allocation = nullptr;
|
||||
GmmHelper *gmmHelper = nullptr;
|
||||
|
||||
uint32_t numAvailableDevices = 0u;
|
||||
uint32_t mocs = 0u;
|
||||
|
||||
bool cpuCoherent = false;
|
||||
bool forceNonAuxMode = false;
|
||||
bool isReadOnly = false;
|
||||
bool useGlobalAtomics = false;
|
||||
bool areMultipleSubDevicesInContext = false;
|
||||
|
||||
protected:
|
||||
EncodeSurfaceStateArgsBase() = default;
|
||||
};
|
||||
} // namespace NEO
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "shared/source/command_stream/stream_properties.h"
|
||||
#include "shared/source/gen12lp/hw_cmds_base.h"
|
||||
#include "shared/source/gen12lp/reg_configs.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/preamble.h"
|
||||
|
||||
using Family = NEO::TGLLPFamily;
|
||||
@@ -21,6 +22,7 @@ using Family = NEO::TGLLPFamily;
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <>
|
||||
size_t EncodeWA<Family>::getAdditionalPipelineSelectSize(Device &device) {
|
||||
size_t size = 0;
|
||||
@@ -67,19 +69,19 @@ void EncodeWA<Family>::encodeAdditionalPipelineSelect(Device &device, LinearStre
|
||||
}
|
||||
|
||||
template <>
|
||||
void EncodeSurfaceState<Family>::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
|
||||
bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) {
|
||||
const bool isL3Allowed = surfaceState->getMemoryObjectControlState() == gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
void EncodeSurfaceState<Family>::encodeExtraBufferParams(EncodeSurfaceStateArgs &args) {
|
||||
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(args.outMemory);
|
||||
const bool isL3Allowed = surfaceState->getMemoryObjectControlState() == args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
if (isL3Allowed) {
|
||||
const bool isConstantSurface = allocation && allocation->getAllocationType() == GraphicsAllocation::AllocationType::CONSTANT_SURFACE;
|
||||
bool useL1 = isReadOnly || isConstantSurface;
|
||||
const bool isConstantSurface = args.allocation && args.allocation->getAllocationType() == GraphicsAllocation::AllocationType::CONSTANT_SURFACE;
|
||||
bool useL1 = args.isReadOnly || isConstantSurface;
|
||||
|
||||
if (DebugManager.flags.ForceL1Caching.get() != 1) {
|
||||
useL1 = false;
|
||||
}
|
||||
|
||||
if (useL1) {
|
||||
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST));
|
||||
surfaceState->setMemoryObjectControlState(args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -194,9 +194,18 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationPr
|
||||
size_t allocSize = size;
|
||||
length.Length = static_cast<uint32_t>(allocSize - 1);
|
||||
GraphicsAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull, 1);
|
||||
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1,
|
||||
false, false, false, 1u,
|
||||
&allocation, pDevice->getGmmHelper(), false, 1u);
|
||||
|
||||
NEO::EncodeSurfaceStateArgs args;
|
||||
args.outMemory = stateBuffer;
|
||||
args.graphicsAddress = gpuAddr;
|
||||
args.size = allocSize;
|
||||
args.mocs = 1;
|
||||
args.numAvailableDevices = 1;
|
||||
args.allocation = &allocation;
|
||||
args.gmmHelper = pDevice->getGmmHelper();
|
||||
args.areMultipleSubDevicesInContext = true;
|
||||
EncodeSurfaceState<FamilyType>::encodeBuffer(args);
|
||||
|
||||
EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth());
|
||||
EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth());
|
||||
EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight());
|
||||
@@ -221,9 +230,17 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationNo
|
||||
size_t allocSize = size;
|
||||
length.Length = static_cast<uint32_t>(allocSize - 1);
|
||||
|
||||
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1,
|
||||
true, false, false, 1u,
|
||||
nullptr, pDevice->getGmmHelper(), false, 1u);
|
||||
NEO::EncodeSurfaceStateArgs args;
|
||||
args.outMemory = stateBuffer;
|
||||
args.graphicsAddress = gpuAddr;
|
||||
args.size = allocSize;
|
||||
args.mocs = 1;
|
||||
args.cpuCoherent = true;
|
||||
args.numAvailableDevices = 1;
|
||||
args.gmmHelper = pDevice->getGmmHelper();
|
||||
args.areMultipleSubDevicesInContext = true;
|
||||
|
||||
EncodeSurfaceState<FamilyType>::encodeBuffer(args);
|
||||
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, state->getSurfaceType());
|
||||
EXPECT_EQ(UnitTestHelper<FamilyType>::getCoherencyTypeSupported(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT), state->getCoherencyType());
|
||||
@@ -247,9 +264,15 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenGpuCoherency
|
||||
size_t allocSize = size;
|
||||
length.Length = static_cast<uint32_t>(allocSize - 1);
|
||||
|
||||
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1,
|
||||
false, false, false, 1u,
|
||||
nullptr, pDevice->getGmmHelper(), false, 1u);
|
||||
NEO::EncodeSurfaceStateArgs args;
|
||||
args.outMemory = stateBuffer;
|
||||
args.graphicsAddress = gpuAddr;
|
||||
args.size = allocSize;
|
||||
args.mocs = 1;
|
||||
args.numAvailableDevices = 1;
|
||||
args.gmmHelper = pDevice->getGmmHelper();
|
||||
args.areMultipleSubDevicesInContext = true;
|
||||
EncodeSurfaceState<FamilyType>::encodeBuffer(args);
|
||||
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, state->getCoherencyType());
|
||||
|
||||
|
||||
Reference in New Issue
Block a user