Unify surface state programming logic related to implicit scaling

OCL image surface state programming for Xe Hp core is now reusing logic
of EncodeSurfaceState helper

Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2022-01-24 15:42:16 +00:00
committed by Compute-Runtime-Automation
parent 52d636394c
commit 5e238dc7f1
9 changed files with 52 additions and 60 deletions

View File

@ -17,25 +17,12 @@ static auto gfxCore = IGFX_XE_HP_CORE;
template <>
void ImageHw<Family>::appendSurfaceStateParams(Family::RENDER_SURFACE_STATE *surfaceState, uint32_t rootDeviceIndex, bool useGlobalAtomics) {
auto imageCtxType = this->context->peekContextType();
bool enableMultiGpuPartialWrites = (imageCtxType != ContextType::CONTEXT_TYPE_SPECIALIZED) && (context->containsMultipleSubDevices(rootDeviceIndex));
bool enableMultiGpuAtomics = enableMultiGpuPartialWrites;
if (DebugManager.flags.EnableMultiGpuAtomicsOptimization.get()) {
enableMultiGpuAtomics &= useGlobalAtomics;
}
surfaceState->setDisableSupportForMultiGpuAtomics(!enableMultiGpuAtomics);
surfaceState->setDisableSupportForMultiGpuPartialWrites(!enableMultiGpuPartialWrites);
if (DebugManager.flags.ForceMultiGpuAtomics.get() != -1) {
surfaceState->setDisableSupportForMultiGpuAtomics(!!DebugManager.flags.ForceMultiGpuAtomics.get());
}
if (DebugManager.flags.ForceMultiGpuPartialWrites.get() != -1) {
surfaceState->setDisableSupportForMultiGpuPartialWrites(!!DebugManager.flags.ForceMultiGpuPartialWrites.get());
}
EncodeSurfaceStateArgs args{};
args.outMemory = surfaceState;
args.useGlobalAtomics = useGlobalAtomics;
args.areMultipleSubDevicesInContext = context->containsMultipleSubDevices(rootDeviceIndex);
args.implicitScaling = args.areMultipleSubDevicesInContext;
EncodeSurfaceState<Family>::encodeImplicitScalingParams(args);
}
} // namespace NEO
#include "opencl/source/mem_obj/image_tgllp_and_later.inl"

View File

@ -18,7 +18,6 @@
#include "opencl/test/unit_test/mocks/mock_platform.h"
using XeHpSdvImageTests = ::testing::Test;
using isXePlatform = IsWithinGfxCore<IGFX_XE_HP_CORE, IGFX_XE_HPC_CORE>;
XEHPTEST_F(XeHpSdvImageTests, givenContextTypeDefaultWhenImageIsWritableAndOnlyOneTileIsAvailableThenRemainFlagsToTrue) {
DebugManagerStateRestore restorer;
@ -247,27 +246,30 @@ XEHPTEST_F(XeHpSdvImageTests, givenContextTypeSpecializedWhenImageIsWritableThen
}
struct MultiGpuGlobalAtomicsImageTest : public XeHpSdvImageTests,
public ::testing::WithParamInterface<std::tuple<unsigned int, unsigned int, ContextType, bool, bool>> {
public ::testing::WithParamInterface<std::tuple<unsigned int, unsigned int, bool, bool>> {
};
XEHPTEST_P(MultiGpuGlobalAtomicsImageTest, givenAppendSurfaceStateParamCalledThenDisableSupportForMultiGpuAtomicsIsSetCorrectly) {
unsigned int numAvailableDevices, memFlags;
ContextType contextType;
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
unsigned int numAvailableSubDevices, memFlags;
bool useGlobalAtomics, enableMultiGpuAtomicsOptimization;
std::tie(numAvailableDevices, memFlags, contextType, useGlobalAtomics, enableMultiGpuAtomicsOptimization) = GetParam();
std::tie(numAvailableSubDevices, memFlags, useGlobalAtomics, enableMultiGpuAtomicsOptimization) = GetParam();
DebugManagerStateRestore restorer;
DebugManager.flags.EnableMultiGpuAtomicsOptimization.set(enableMultiGpuAtomicsOptimization);
DebugManager.flags.CreateMultipleSubDevices.set(numAvailableDevices);
initPlatform();
if (numAvailableDevices == 1) {
EXPECT_EQ(0u, platform()->getClDevice(0)->getNumGenericSubDevices());
} else {
EXPECT_EQ(numAvailableDevices, platform()->getClDevice(0)->getNumGenericSubDevices());
UltClDeviceFactory deviceFactory{1, 2};
ClDeviceVector deviceVector;
for (auto i = 0u; i < numAvailableSubDevices; i++) {
deviceVector.push_back(deviceFactory.subDevices[i]);
}
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
MockContext context(platform()->getClDevice(0));
context.contextType = contextType;
if (deviceVector.empty()) {
deviceVector.push_back(deviceFactory.rootDevices[0]);
}
MockContext context(deviceVector);
cl_int retVal = CL_SUCCESS;
cl_image_format imageFormat = {};
@ -294,23 +296,21 @@ XEHPTEST_P(MultiGpuGlobalAtomicsImageTest, givenAppendSurfaceStateParamCalledThe
surfaceState.setDisableSupportForMultiGpuPartialWrites(false);
imageHw->appendSurfaceStateParams(&surfaceState, context.getDevice(0)->getRootDeviceIndex(), useGlobalAtomics);
bool enableGlobalAtomics = (contextType != ContextType::CONTEXT_TYPE_SPECIALIZED) && (numAvailableDevices > 1);
bool enableGlobalAtomics = numAvailableSubDevices != 1u;
if (enableMultiGpuAtomicsOptimization) {
enableGlobalAtomics &= useGlobalAtomics;
}
EXPECT_EQ(!enableGlobalAtomics, surfaceState.getDisableSupportForMultiGpuAtomics());
}
static unsigned int numAvailableDevicesForMultiGpuGlobalAtomicsImageTest[] = {1, 2};
static unsigned int numAvailableSubDevicesForMultiGpuGlobalAtomicsImageTest[] = {0, 1, 2};
static unsigned int memFlags[] = {CL_MEM_READ_ONLY, CL_MEM_READ_WRITE};
static ContextType contextTypes[] = {ContextType::CONTEXT_TYPE_DEFAULT, ContextType::CONTEXT_TYPE_SPECIALIZED, ContextType::CONTEXT_TYPE_UNRESTRICTIVE};
INSTANTIATE_TEST_CASE_P(MultiGpuGlobalAtomicsImageTest,
MultiGpuGlobalAtomicsImageTest,
::testing::Combine(
::testing::ValuesIn(numAvailableDevicesForMultiGpuGlobalAtomicsImageTest),
::testing::ValuesIn(numAvailableSubDevicesForMultiGpuGlobalAtomicsImageTest),
::testing::ValuesIn(memFlags),
::testing::ValuesIn(contextTypes),
::testing::Bool(),
::testing::Bool()));

View File

@ -257,6 +257,7 @@ struct EncodeSurfaceState {
static void encodeBuffer(EncodeSurfaceStateArgs &args);
static void encodeExtraBufferParams(EncodeSurfaceStateArgs &args);
static void encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args);
static void encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo);
static void appendBufferSurfaceState(EncodeSurfaceStateArgs &args);

View File

@ -478,6 +478,9 @@ template <typename Family>
void EncodeSurfaceState<Family>::appendParamsForImageFromBuffer(R_SURFACE_STATE *surfaceState) {
}
template <typename Family>
void EncodeSurfaceState<Family>::encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args) {}
template <typename Family>
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset) {

View File

@ -649,25 +649,7 @@ void EncodeSurfaceState<Family>::encodeExtraBufferParams(EncodeSurfaceStateArgs
encodeExtraCacheSettings(surfaceState, *args.gmmHelper->getHardwareInfo());
if constexpr (Family::isUsingMultiGpuProgrammingInSurfaceState) {
bool enablePartialWrites = args.implicitScaling;
bool enableMultiGpuAtomics = enablePartialWrites;
if (DebugManager.flags.EnableMultiGpuAtomicsOptimization.get()) {
enableMultiGpuAtomics = args.useGlobalAtomics && (enablePartialWrites || args.areMultipleSubDevicesInContext);
}
surfaceState->setDisableSupportForMultiGpuAtomics(!enableMultiGpuAtomics);
surfaceState->setDisableSupportForMultiGpuPartialWrites(!enablePartialWrites);
if (DebugManager.flags.ForceMultiGpuAtomics.get() != -1) {
surfaceState->setDisableSupportForMultiGpuAtomics(!!DebugManager.flags.ForceMultiGpuAtomics.get());
}
if (DebugManager.flags.ForceMultiGpuPartialWrites.get() != -1) {
surfaceState->setDisableSupportForMultiGpuPartialWrites(!!DebugManager.flags.ForceMultiGpuPartialWrites.get());
}
}
encodeImplicitScalingParams(args);
if (EncodeSurfaceState<Family>::isAuxModeEnabled(surfaceState, gmm)) {
auto resourceFormat = gmm->gmmResourceInfo->getResourceFormat();

View File

@ -31,6 +31,28 @@ template <>
inline void EncodeSurfaceState<Family>::encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo) {
}
template <>
void EncodeSurfaceState<Family>::encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args) {
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(args.outMemory);
bool enablePartialWrites = args.implicitScaling;
bool enableMultiGpuAtomics = enablePartialWrites;
if (DebugManager.flags.EnableMultiGpuAtomicsOptimization.get()) {
enableMultiGpuAtomics = args.useGlobalAtomics && (enablePartialWrites || args.areMultipleSubDevicesInContext);
}
surfaceState->setDisableSupportForMultiGpuAtomics(!enableMultiGpuAtomics);
surfaceState->setDisableSupportForMultiGpuPartialWrites(!enablePartialWrites);
if (DebugManager.flags.ForceMultiGpuAtomics.get() != -1) {
surfaceState->setDisableSupportForMultiGpuAtomics(!!DebugManager.flags.ForceMultiGpuAtomics.get());
}
if (DebugManager.flags.ForceMultiGpuPartialWrites.get() != -1) {
surfaceState->setDisableSupportForMultiGpuPartialWrites(!!DebugManager.flags.ForceMultiGpuPartialWrites.get());
}
}
template <>
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
}

View File

@ -31,7 +31,6 @@ struct XeHpCore {
static constexpr bool isUsingMediaSamplerDopClockGate = true;
static constexpr bool supportsSampler = true;
static constexpr bool isUsingGenericMediaStateClear = true;
static constexpr bool isUsingMultiGpuProgrammingInSurfaceState = true;
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
union {

View File

@ -37,7 +37,6 @@ struct XE_HPC_CORE {
static constexpr bool isUsingMediaSamplerDopClockGate = false;
static constexpr bool supportsSampler = false;
static constexpr bool isUsingGenericMediaStateClear = true;
static constexpr bool isUsingMultiGpuProgrammingInSurfaceState = false;
static bool isXlA0(const HardwareInfo &hwInfo) {
auto revId = hwInfo.platform.usRevId & pvcSteppingBits;

View File

@ -31,7 +31,6 @@ struct XE_HPG_CORE {
static constexpr bool isUsingMediaSamplerDopClockGate = false;
static constexpr bool supportsSampler = true;
static constexpr bool isUsingGenericMediaStateClear = true;
static constexpr bool isUsingMultiGpuProgrammingInSurfaceState = false;
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
union {