Refactor creation of buffer surface state 1/n

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-10-21 01:30:53 +00:00
committed by Compute-Runtime-Automation
parent 53d77bb801
commit 9d56939980
20 changed files with 236 additions and 97 deletions

View File

@@ -702,6 +702,7 @@ include_directories(${NEO_BUILD_DIR})
include_directories(${NEO_SOURCE_DIR})
include_directories(${NEO_SHARED_DIRECTORY}/aub_mem_dump/definitions${BRANCH_DIR_SUFFIX})
include_directories(${NEO_SHARED_DIRECTORY}/built_ins/builtinops${BRANCH_DIR_SUFFIX})
include_directories(${NEO_SHARED_DIRECTORY}/command_container/definitions${BRANCH_DIR_SUFFIX})
include_directories(${NEO_SHARED_DIRECTORY}/command_stream/definitions${BRANCH_DIR_SUFFIX})
include_directories(${NEO_SHARED_DIRECTORY}/compiler_interface/compiler_options${BRANCH_DIR_SUFFIX})
include_directories(${NEO_SHARED_DIRECTORY}/debug_settings/definitions${BRANCH_DIR_SUFFIX})

View File

@@ -169,13 +169,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
if (neoDevice->getDebugger()) {
auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
auto debugSurface = device->getDebugSurface();
auto mocs = device->getMOCS(false, false);
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(&surfaceState, debugSurface->getGpuAddress(),
debugSurface->getUnderlyingBufferSize(), mocs,
false, false, false, neoDevice->getNumGenericSubDevices(),
debugSurface, neoDevice->getGmmHelper(), kernelImp->getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, 1u);
NEO::EncodeSurfaceStateArgs args;
args.outMemory = &surfaceState;
args.graphicsAddress = device->getDebugSurface()->getGpuAddress();
args.size = device->getDebugSurface()->getUnderlyingBufferSize();
args.mocs = device->getMOCS(false, false);
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
args.allocation = device->getDebugSurface();
args.gmmHelper = neoDevice->getGmmHelper();
args.useGlobalAtomics = kernelImp->getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics;
args.areMultipleSubDevicesInContext = true;
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
}

View File

@@ -284,14 +284,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
if (neoDevice->getDebugger()) {
auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
auto debugSurface = device->getDebugSurface();
auto mocs = device->getMOCS(false, false);
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(&surfaceState, debugSurface->getGpuAddress(),
debugSurface->getUnderlyingBufferSize(), mocs,
false, false, false, neoDevice->getNumGenericSubDevices(),
debugSurface, neoDevice->getGmmHelper(),
kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, 1u);
NEO::EncodeSurfaceStateArgs args;
args.outMemory = &surfaceState;
args.graphicsAddress = device->getDebugSurface()->getGpuAddress();
args.size = device->getDebugSurface()->getUnderlyingBufferSize();
args.mocs = device->getMOCS(false, false);
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
args.allocation = device->getDebugSurface();
args.gmmHelper = neoDevice->getGmmHelper();
args.useGlobalAtomics = kernelImp->getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics;
args.areMultipleSubDevicesInContext = true;
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
}
// Attach Function residency to our CommandList residency

View File

@@ -66,13 +66,20 @@ struct KernelHw : public KernelImp {
if (allocData && allocData->allocationFlagsProperty.flags.locallyUncachedResource) {
l3Enabled = false;
}
auto mocs = this->module->getDevice()->getMOCS(l3Enabled, false);
NEO::Device *neoDevice = module->getDevice()->getNEODevice();
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(&surfaceState, bufferAddressForSsh, bufferSizeForSsh, mocs,
false, false, false, neoDevice->getNumGenericSubDevices(),
alloc, neoDevice->getGmmHelper(),
kernelImmData->getDescriptor().kernelAttributes.flags.useGlobalAtomics, 1u);
NEO::EncodeSurfaceStateArgs args;
args.outMemory = &surfaceState;
args.graphicsAddress = bufferAddressForSsh;
args.size = bufferSizeForSsh;
args.mocs = this->module->getDevice()->getMOCS(l3Enabled, false);
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
args.allocation = alloc;
args.gmmHelper = neoDevice->getGmmHelper();
args.useGlobalAtomics = kernelImmData->getDescriptor().kernelAttributes.flags.useGlobalAtomics;
args.areMultipleSubDevicesInContext = true;
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateAddress) = surfaceState;
}

View File

@@ -24,7 +24,7 @@ set(RUNTIME_SRCS_MEM_OBJ
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper_common.inl
${CMAKE_CURRENT_SOURCE_DIR}/pipe.cpp
${CMAKE_CURRENT_SOURCE_DIR}/pipe.h
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}buffer_ext.inl
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}buffer_ext.cpp
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}image_ext.inl
)

View File

@@ -19,10 +19,11 @@
#include <functional>
namespace NEO {
class Device;
class Buffer;
class ClDevice;
class Device;
class MemoryManager;
struct EncodeSurfaceStateArgs;
using BufferCreatFunc = Buffer *(*)(Context *context,
MemoryProperties memoryProperties,
@@ -193,6 +194,8 @@ class Buffer : public MemObj {
static bool isReadOnlyMemoryPermittedByFlags(const MemoryProperties &properties);
void transferData(void *dst, void *src, size_t copySize, size_t copyOffset);
void appendSurfaceStateArgs(EncodeSurfaceStateArgs &args);
};
template <typename GfxFamily>
@@ -214,7 +217,6 @@ class BufferHw : public Buffer {
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation,
bool isReadOnlyArgument, const Device &device, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) override;
void appendSurfaceStateExt(void *memory);
static Buffer *create(Context *context,
MemoryProperties memoryProperties,

View File

@@ -19,7 +19,6 @@
#include "opencl/source/helpers/surface_formats.h"
#include "opencl/source/mem_obj/buffer.h"
#include "buffer_ext.inl"
#include "hw_cmds.h"
namespace NEO {
@@ -39,11 +38,21 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, boo
auto rootDeviceIndex = device.getRootDeviceIndex();
auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex);
const auto isReadOnly = isValueSet(getFlags(), CL_MEM_READ_ONLY) || isReadOnlyArgument;
EncodeSurfaceState<GfxFamily>::encodeBuffer(memory, getBufferAddress(rootDeviceIndex),
getSurfaceSize(alignSizeForAuxTranslation, rootDeviceIndex),
getMocsValue(disableL3, isReadOnly, rootDeviceIndex),
true, forceNonAuxMode, isReadOnly, device.getNumGenericSubDevices(),
graphicsAllocation, device.getGmmHelper(), useGlobalAtomics, areMultipleSubDevicesInContext);
appendSurfaceStateExt(memory);
NEO::EncodeSurfaceStateArgs args;
args.outMemory = memory;
args.graphicsAddress = getBufferAddress(rootDeviceIndex);
args.size = getSurfaceSize(alignSizeForAuxTranslation, rootDeviceIndex);
args.mocs = getMocsValue(disableL3, isReadOnly, rootDeviceIndex);
args.cpuCoherent = true;
args.forceNonAuxMode = forceNonAuxMode;
args.isReadOnly = isReadOnly;
args.numAvailableDevices = device.getNumGenericSubDevices();
args.allocation = graphicsAllocation;
args.gmmHelper = device.getGmmHelper();
args.useGlobalAtomics = useGlobalAtomics;
args.areMultipleSubDevicesInContext = areMultipleSubDevicesInContext;
appendSurfaceStateArgs(args);
EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
}
} // namespace NEO

View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/mem_obj/buffer.h"
#include "encode_surface_state_args.h"
namespace NEO {
void Buffer::appendSurfaceStateArgs(EncodeSurfaceStateArgs &args) {
}
} // namespace NEO

View File

@@ -1,16 +0,0 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/mem_obj/buffer.h"
namespace NEO {
template <typename GfxFamily>
void BufferHw<GfxFamily>::appendSurfaceStateExt(void *memory) {
}
} // namespace NEO

View File

@@ -500,9 +500,17 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, RenderSurfaceStateXeHPAndLaterTests, givenSpecificP
std::unique_ptr<BufferHw<FamilyType>> buffer(static_cast<BufferHw<FamilyType> *>(
BufferHw<FamilyType>::create(&context, {}, 0, 0, allocationSize, nullptr, nullptr, multiGraphicsAllocation, false, false, false)));
EncodeSurfaceState<FamilyType>::encodeBuffer(&rssCmd, allocation->getGpuAddress(), allocation->getUnderlyingBufferSize(),
buffer->getMocsValue(false, false, pClDevice->getRootDeviceIndex()), false, false, false,
pClDevice->getNumGenericSubDevices(), allocation, pClDevice->getGmmHelper(), false, 1u);
NEO::EncodeSurfaceStateArgs args;
args.outMemory = &rssCmd;
args.graphicsAddress = allocation->getGpuAddress();
args.size = allocation->getUnderlyingBufferSize();
args.mocs = buffer->getMocsValue(false, false, pClDevice->getRootDeviceIndex());
args.numAvailableDevices = pClDevice->getNumGenericSubDevices();
args.allocation = allocation;
args.gmmHelper = pClDevice->getGmmHelper();
args.areMultipleSubDevicesInContext = true;
EncodeSurfaceState<FamilyType>::encodeBuffer(args);
EXPECT_EQ(FamilyType::RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, rssCmd.getCoherencyType());
}

View File

@@ -15,6 +15,9 @@ set(NEO_CORE_COMMAND_CONTAINER
${CMAKE_CURRENT_SOURCE_DIR}/encode_compute_mode_tgllp_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/implicit_scaling.cpp
${CMAKE_CURRENT_SOURCE_DIR}/implicit_scaling.h
${CMAKE_CURRENT_SOURCE_DIR}/definitions/encode_surface_state_args_base.h
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}encode_surface_state.inl
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}encode_surface_state_args.h
)
if(SUPPORT_XEHP_AND_LATER)

View File

@@ -16,6 +16,8 @@
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
#include "shared/source/kernel/kernel_arg_descriptor.h"
#include "encode_surface_state_args.h"
#include <algorithm>
namespace NEO {
@@ -246,12 +248,10 @@ struct EncodeSurfaceState {
using AUXILIARY_SURFACE_MODE = typename R_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
using COHERENCY_TYPE = typename R_SURFACE_STATE::COHERENCY_TYPE;
static void encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs,
bool cpuCoherent, bool forceNonAuxMode, bool isReadOnly, uint32_t numAvailableDevices,
GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool useGlobalAtomics, bool areMultipleSubDevicesInContext);
static void encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, bool areMultipleSubDevicesInContext);
static void encodeBuffer(EncodeSurfaceStateArgs &args);
static void encodeExtraBufferParams(EncodeSurfaceStateArgs &args);
static void encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo);
static void appendBufferSurfaceState(EncodeSurfaceStateArgs &args);
static constexpr uintptr_t getSurfaceBaseAddressAlignmentMask() {
return ~(getSurfaceBaseAddressAlignment() - 1);

View File

@@ -26,6 +26,8 @@
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "encode_surface_state.inl"
#include <algorithm>
namespace NEO {
@@ -337,21 +339,19 @@ void EncodeStoreMMIO<Family>::encode(LinearStream &csr, uint32_t offset, uint64_
}
template <typename Family>
void EncodeSurfaceState<Family>::encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs,
bool cpuCoherent, bool forceNonAuxMode, bool isReadOnly, uint32_t numAvailableDevices,
GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) {
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(dst);
UNRECOVERABLE_IF(!isAligned<getSurfaceBaseAddressAlignment()>(size));
void EncodeSurfaceState<Family>::encodeBuffer(EncodeSurfaceStateArgs &args) {
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(args.outMemory);
UNRECOVERABLE_IF(!isAligned<getSurfaceBaseAddressAlignment()>(args.size));
SURFACE_STATE_BUFFER_LENGTH Length = {0};
Length.Length = static_cast<uint32_t>(size - 1);
Length.Length = static_cast<uint32_t>(args.size - 1);
surfaceState->setWidth(Length.SurfaceState.Width + 1);
surfaceState->setHeight(Length.SurfaceState.Height + 1);
surfaceState->setDepth(Length.SurfaceState.Depth + 1);
surfaceState->setSurfaceType((address != 0) ? R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER
: R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL);
surfaceState->setSurfaceType((args.graphicsAddress != 0) ? R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER
: R_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL);
surfaceState->setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW);
surfaceState->setSurfaceVerticalAlignment(R_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4);
surfaceState->setSurfaceHorizontalAlignment(R_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4);
@@ -359,25 +359,27 @@ void EncodeSurfaceState<Family>::encodeBuffer(void *dst, uint64_t address, size_
surfaceState->setTileMode(R_SURFACE_STATE::TILE_MODE_LINEAR);
surfaceState->setVerticalLineStride(0);
surfaceState->setVerticalLineStrideOffset(0);
surfaceState->setMemoryObjectControlState(mocs);
surfaceState->setSurfaceBaseAddress(address);
surfaceState->setMemoryObjectControlState(args.mocs);
surfaceState->setSurfaceBaseAddress(args.graphicsAddress);
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE);
setCoherencyType(surfaceState, cpuCoherent ? R_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT : R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
setCoherencyType(surfaceState, args.cpuCoherent ? R_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT : R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
Gmm *gmm = allocation ? allocation->getDefaultGmm() : nullptr;
if (gmm && gmm->isCompressionEnabled && !forceNonAuxMode) {
Gmm *gmm = args.allocation ? args.allocation->getDefaultGmm() : nullptr;
if (gmm && gmm->isCompressionEnabled && !args.forceNonAuxMode) {
// Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios
setCoherencyType(surfaceState, R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
setBufferAuxParamsForCCS(surfaceState);
}
if (DebugManager.flags.DisableCachingForStatefulBufferAccess.get()) {
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
surfaceState->setMemoryObjectControlState(args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
}
EncodeSurfaceState<Family>::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, isReadOnly, numAvailableDevices, useGlobalAtomics, areMultipleSubDevicesInContext);
EncodeSurfaceState<Family>::encodeExtraBufferParams(args);
EncodeSurfaceState<Family>::appendBufferSurfaceState(args);
}
template <typename Family>

View File

@@ -449,9 +449,9 @@ inline size_t EncodeWA<GfxFamily>::getAdditionalPipelineSelectSize(Device &devic
}
template <typename GfxFamily>
inline void EncodeSurfaceState<GfxFamily>::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) {
encodeExtraCacheSettings(surfaceState, *gmmHelper->getHardwareInfo());
inline void EncodeSurfaceState<GfxFamily>::encodeExtraBufferParams(EncodeSurfaceStateArgs &args) {
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(args.outMemory);
encodeExtraCacheSettings(surfaceState, *args.gmmHelper->getHardwareInfo());
}
template <typename GfxFamily>

View File

@@ -618,33 +618,33 @@ bool EncodeSurfaceState<Family>::doBindingTablePrefetch() {
}
template <typename Family>
void EncodeSurfaceState<Family>::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) {
Gmm *gmm = allocation ? allocation->getDefaultGmm() : nullptr;
void EncodeSurfaceState<Family>::encodeExtraBufferParams(EncodeSurfaceStateArgs &args) {
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(args.outMemory);
Gmm *gmm = args.allocation ? args.allocation->getDefaultGmm() : nullptr;
uint32_t compressionFormat = 0;
bool setConstCachePolicy = false;
if (allocation && allocation->getAllocationType() == GraphicsAllocation::AllocationType::CONSTANT_SURFACE) {
if (args.allocation && args.allocation->getAllocationType() == GraphicsAllocation::AllocationType::CONSTANT_SURFACE) {
setConstCachePolicy = true;
}
if (surfaceState->getMemoryObjectControlState() == gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) &&
if (surfaceState->getMemoryObjectControlState() == args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) &&
DebugManager.flags.ForceL1Caching.get() != 0) {
setConstCachePolicy = true;
}
if (setConstCachePolicy == true) {
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST));
surfaceState->setMemoryObjectControlState(args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST));
}
encodeExtraCacheSettings(surfaceState, *gmmHelper->getHardwareInfo());
DeviceBitfield deviceBitfield{static_cast<uint32_t>(maxNBitValue(numAvailableDevices))};
encodeExtraCacheSettings(surfaceState, *args.gmmHelper->getHardwareInfo());
DeviceBitfield deviceBitfield{static_cast<uint32_t>(maxNBitValue(args.numAvailableDevices))};
bool implicitScaling = ImplicitScalingHelper::isImplicitScalingEnabled(deviceBitfield, true);
bool enablePartialWrites = implicitScaling;
bool enableMultiGpuAtomics = enablePartialWrites;
if (DebugManager.flags.EnableMultiGpuAtomicsOptimization.get()) {
enableMultiGpuAtomics = useGlobalAtomics && (enablePartialWrites || areMultipleSubDevicesInContext);
enableMultiGpuAtomics = args.useGlobalAtomics && (enablePartialWrites || args.areMultipleSubDevicesInContext);
}
surfaceState->setDisableSupportForMultiGpuAtomics(!enableMultiGpuAtomics);
@@ -660,7 +660,7 @@ void EncodeSurfaceState<Family>::encodeExtraBufferParams(R_SURFACE_STATE *surfac
if (EncodeSurfaceState<Family>::isAuxModeEnabled(surfaceState, gmm)) {
auto resourceFormat = gmm->gmmResourceInfo->getResourceFormat();
compressionFormat = gmmHelper->getClientContext()->getSurfaceStateCompressionFormat(resourceFormat);
compressionFormat = args.gmmHelper->getClientContext()->getSurfaceStateCompressionFormat(resourceFormat);
if (DebugManager.flags.ForceBufferCompressionFormat.get() != -1) {
compressionFormat = DebugManager.flags.ForceBufferCompressionFormat.get();
@@ -668,7 +668,7 @@ void EncodeSurfaceState<Family>::encodeExtraBufferParams(R_SURFACE_STATE *surfac
}
if (DebugManager.flags.EnableStatelessCompressionWithUnifiedMemory.get()) {
if (allocation && !MemoryPool::isSystemMemoryPool(allocation->getMemoryPool())) {
if (args.allocation && !MemoryPool::isSystemMemoryPool(args.allocation->getMemoryPool())) {
setCoherencyType(surfaceState, R_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
setBufferAuxParamsForCCS(surfaceState);
compressionFormat = DebugManager.flags.FormatForStatelessCompressionWithUnifiedMemory.get();

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/definitions/encode_surface_state_args_base.h"
namespace NEO {
template <typename GfxFamily>
void EncodeSurfaceState<GfxFamily>::appendBufferSurfaceState(EncodeSurfaceStateArgs &args) {
}
} // namespace NEO

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/command_container/definitions/encode_surface_state_args_base.h"
namespace NEO {
struct EncodeSurfaceStateArgs : EncodeSurfaceStateArgsBase {
EncodeSurfaceStateArgs() = default;
};
} // namespace NEO

View File

@@ -0,0 +1,37 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
namespace NEO {
class GmmHelper;
class GraphicsAllocation;
struct EncodeSurfaceStateArgsBase {
uint64_t graphicsAddress = 0ull;
size_t size = 0u;
void *outMemory = nullptr;
GraphicsAllocation *allocation = nullptr;
GmmHelper *gmmHelper = nullptr;
uint32_t numAvailableDevices = 0u;
uint32_t mocs = 0u;
bool cpuCoherent = false;
bool forceNonAuxMode = false;
bool isReadOnly = false;
bool useGlobalAtomics = false;
bool areMultipleSubDevicesInContext = false;
protected:
EncodeSurfaceStateArgsBase() = default;
};
} // namespace NEO

View File

@@ -9,6 +9,7 @@
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/gen12lp/hw_cmds_base.h"
#include "shared/source/gen12lp/reg_configs.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/preamble.h"
using Family = NEO::TGLLPFamily;
@@ -21,6 +22,7 @@ using Family = NEO::TGLLPFamily;
#include "shared/source/command_stream/command_stream_receiver.h"
namespace NEO {
template <>
size_t EncodeWA<Family>::getAdditionalPipelineSelectSize(Device &device) {
size_t size = 0;
@@ -67,19 +69,19 @@ void EncodeWA<Family>::encodeAdditionalPipelineSelect(Device &device, LinearStre
}
template <>
void EncodeSurfaceState<Family>::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) {
const bool isL3Allowed = surfaceState->getMemoryObjectControlState() == gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
void EncodeSurfaceState<Family>::encodeExtraBufferParams(EncodeSurfaceStateArgs &args) {
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(args.outMemory);
const bool isL3Allowed = surfaceState->getMemoryObjectControlState() == args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
if (isL3Allowed) {
const bool isConstantSurface = allocation && allocation->getAllocationType() == GraphicsAllocation::AllocationType::CONSTANT_SURFACE;
bool useL1 = isReadOnly || isConstantSurface;
const bool isConstantSurface = args.allocation && args.allocation->getAllocationType() == GraphicsAllocation::AllocationType::CONSTANT_SURFACE;
bool useL1 = args.isReadOnly || isConstantSurface;
if (DebugManager.flags.ForceL1Caching.get() != 1) {
useL1 = false;
}
if (useL1) {
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST));
surfaceState->setMemoryObjectControlState(args.gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST));
}
}
}

View File

@@ -194,9 +194,18 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationPr
size_t allocSize = size;
length.Length = static_cast<uint32_t>(allocSize - 1);
GraphicsAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull, 1);
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1,
false, false, false, 1u,
&allocation, pDevice->getGmmHelper(), false, 1u);
NEO::EncodeSurfaceStateArgs args;
args.outMemory = stateBuffer;
args.graphicsAddress = gpuAddr;
args.size = allocSize;
args.mocs = 1;
args.numAvailableDevices = 1;
args.allocation = &allocation;
args.gmmHelper = pDevice->getGmmHelper();
args.areMultipleSubDevicesInContext = true;
EncodeSurfaceState<FamilyType>::encodeBuffer(args);
EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth());
EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth());
EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight());
@@ -221,9 +230,17 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationNo
size_t allocSize = size;
length.Length = static_cast<uint32_t>(allocSize - 1);
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1,
true, false, false, 1u,
nullptr, pDevice->getGmmHelper(), false, 1u);
NEO::EncodeSurfaceStateArgs args;
args.outMemory = stateBuffer;
args.graphicsAddress = gpuAddr;
args.size = allocSize;
args.mocs = 1;
args.cpuCoherent = true;
args.numAvailableDevices = 1;
args.gmmHelper = pDevice->getGmmHelper();
args.areMultipleSubDevicesInContext = true;
EncodeSurfaceState<FamilyType>::encodeBuffer(args);
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, state->getSurfaceType());
EXPECT_EQ(UnitTestHelper<FamilyType>::getCoherencyTypeSupported(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT), state->getCoherencyType());
@@ -247,9 +264,15 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenGpuCoherency
size_t allocSize = size;
length.Length = static_cast<uint32_t>(allocSize - 1);
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1,
false, false, false, 1u,
nullptr, pDevice->getGmmHelper(), false, 1u);
NEO::EncodeSurfaceStateArgs args;
args.outMemory = stateBuffer;
args.graphicsAddress = gpuAddr;
args.size = allocSize;
args.mocs = 1;
args.numAvailableDevices = 1;
args.gmmHelper = pDevice->getGmmHelper();
args.areMultipleSubDevicesInContext = true;
EncodeSurfaceState<FamilyType>::encodeBuffer(args);
EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, state->getCoherencyType());