mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Unify surface state programming logic related to implicit scaling
OCL image surface state programming for Xe Hp core is now reusing logic of EncodeSurfaceState helper Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
52d636394c
commit
5e238dc7f1
@ -17,25 +17,12 @@ static auto gfxCore = IGFX_XE_HP_CORE;
|
||||
|
||||
template <>
|
||||
void ImageHw<Family>::appendSurfaceStateParams(Family::RENDER_SURFACE_STATE *surfaceState, uint32_t rootDeviceIndex, bool useGlobalAtomics) {
|
||||
auto imageCtxType = this->context->peekContextType();
|
||||
|
||||
bool enableMultiGpuPartialWrites = (imageCtxType != ContextType::CONTEXT_TYPE_SPECIALIZED) && (context->containsMultipleSubDevices(rootDeviceIndex));
|
||||
|
||||
bool enableMultiGpuAtomics = enableMultiGpuPartialWrites;
|
||||
|
||||
if (DebugManager.flags.EnableMultiGpuAtomicsOptimization.get()) {
|
||||
enableMultiGpuAtomics &= useGlobalAtomics;
|
||||
}
|
||||
|
||||
surfaceState->setDisableSupportForMultiGpuAtomics(!enableMultiGpuAtomics);
|
||||
surfaceState->setDisableSupportForMultiGpuPartialWrites(!enableMultiGpuPartialWrites);
|
||||
|
||||
if (DebugManager.flags.ForceMultiGpuAtomics.get() != -1) {
|
||||
surfaceState->setDisableSupportForMultiGpuAtomics(!!DebugManager.flags.ForceMultiGpuAtomics.get());
|
||||
}
|
||||
if (DebugManager.flags.ForceMultiGpuPartialWrites.get() != -1) {
|
||||
surfaceState->setDisableSupportForMultiGpuPartialWrites(!!DebugManager.flags.ForceMultiGpuPartialWrites.get());
|
||||
}
|
||||
EncodeSurfaceStateArgs args{};
|
||||
args.outMemory = surfaceState;
|
||||
args.useGlobalAtomics = useGlobalAtomics;
|
||||
args.areMultipleSubDevicesInContext = context->containsMultipleSubDevices(rootDeviceIndex);
|
||||
args.implicitScaling = args.areMultipleSubDevicesInContext;
|
||||
EncodeSurfaceState<Family>::encodeImplicitScalingParams(args);
|
||||
}
|
||||
} // namespace NEO
|
||||
#include "opencl/source/mem_obj/image_tgllp_and_later.inl"
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include "opencl/test/unit_test/mocks/mock_platform.h"
|
||||
|
||||
using XeHpSdvImageTests = ::testing::Test;
|
||||
using isXePlatform = IsWithinGfxCore<IGFX_XE_HP_CORE, IGFX_XE_HPC_CORE>;
|
||||
|
||||
XEHPTEST_F(XeHpSdvImageTests, givenContextTypeDefaultWhenImageIsWritableAndOnlyOneTileIsAvailableThenRemainFlagsToTrue) {
|
||||
DebugManagerStateRestore restorer;
|
||||
@ -247,27 +246,30 @@ XEHPTEST_F(XeHpSdvImageTests, givenContextTypeSpecializedWhenImageIsWritableThen
|
||||
}
|
||||
|
||||
struct MultiGpuGlobalAtomicsImageTest : public XeHpSdvImageTests,
|
||||
public ::testing::WithParamInterface<std::tuple<unsigned int, unsigned int, ContextType, bool, bool>> {
|
||||
public ::testing::WithParamInterface<std::tuple<unsigned int, unsigned int, bool, bool>> {
|
||||
};
|
||||
|
||||
XEHPTEST_P(MultiGpuGlobalAtomicsImageTest, givenAppendSurfaceStateParamCalledThenDisableSupportForMultiGpuAtomicsIsSetCorrectly) {
|
||||
unsigned int numAvailableDevices, memFlags;
|
||||
ContextType contextType;
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
unsigned int numAvailableSubDevices, memFlags;
|
||||
bool useGlobalAtomics, enableMultiGpuAtomicsOptimization;
|
||||
std::tie(numAvailableDevices, memFlags, contextType, useGlobalAtomics, enableMultiGpuAtomicsOptimization) = GetParam();
|
||||
std::tie(numAvailableSubDevices, memFlags, useGlobalAtomics, enableMultiGpuAtomicsOptimization) = GetParam();
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableMultiGpuAtomicsOptimization.set(enableMultiGpuAtomicsOptimization);
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(numAvailableDevices);
|
||||
initPlatform();
|
||||
if (numAvailableDevices == 1) {
|
||||
EXPECT_EQ(0u, platform()->getClDevice(0)->getNumGenericSubDevices());
|
||||
} else {
|
||||
EXPECT_EQ(numAvailableDevices, platform()->getClDevice(0)->getNumGenericSubDevices());
|
||||
|
||||
UltClDeviceFactory deviceFactory{1, 2};
|
||||
|
||||
ClDeviceVector deviceVector;
|
||||
|
||||
for (auto i = 0u; i < numAvailableSubDevices; i++) {
|
||||
deviceVector.push_back(deviceFactory.subDevices[i]);
|
||||
}
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
MockContext context(platform()->getClDevice(0));
|
||||
context.contextType = contextType;
|
||||
if (deviceVector.empty()) {
|
||||
deviceVector.push_back(deviceFactory.rootDevices[0]);
|
||||
}
|
||||
|
||||
MockContext context(deviceVector);
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
cl_image_format imageFormat = {};
|
||||
@ -294,23 +296,21 @@ XEHPTEST_P(MultiGpuGlobalAtomicsImageTest, givenAppendSurfaceStateParamCalledThe
|
||||
surfaceState.setDisableSupportForMultiGpuPartialWrites(false);
|
||||
imageHw->appendSurfaceStateParams(&surfaceState, context.getDevice(0)->getRootDeviceIndex(), useGlobalAtomics);
|
||||
|
||||
bool enableGlobalAtomics = (contextType != ContextType::CONTEXT_TYPE_SPECIALIZED) && (numAvailableDevices > 1);
|
||||
bool enableGlobalAtomics = numAvailableSubDevices != 1u;
|
||||
if (enableMultiGpuAtomicsOptimization) {
|
||||
enableGlobalAtomics &= useGlobalAtomics;
|
||||
}
|
||||
EXPECT_EQ(!enableGlobalAtomics, surfaceState.getDisableSupportForMultiGpuAtomics());
|
||||
}
|
||||
|
||||
static unsigned int numAvailableDevicesForMultiGpuGlobalAtomicsImageTest[] = {1, 2};
|
||||
static unsigned int numAvailableSubDevicesForMultiGpuGlobalAtomicsImageTest[] = {0, 1, 2};
|
||||
static unsigned int memFlags[] = {CL_MEM_READ_ONLY, CL_MEM_READ_WRITE};
|
||||
static ContextType contextTypes[] = {ContextType::CONTEXT_TYPE_DEFAULT, ContextType::CONTEXT_TYPE_SPECIALIZED, ContextType::CONTEXT_TYPE_UNRESTRICTIVE};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(MultiGpuGlobalAtomicsImageTest,
|
||||
MultiGpuGlobalAtomicsImageTest,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(numAvailableDevicesForMultiGpuGlobalAtomicsImageTest),
|
||||
::testing::ValuesIn(numAvailableSubDevicesForMultiGpuGlobalAtomicsImageTest),
|
||||
::testing::ValuesIn(memFlags),
|
||||
::testing::ValuesIn(contextTypes),
|
||||
::testing::Bool(),
|
||||
::testing::Bool()));
|
||||
|
||||
|
@ -257,6 +257,7 @@ struct EncodeSurfaceState {
|
||||
|
||||
static void encodeBuffer(EncodeSurfaceStateArgs &args);
|
||||
static void encodeExtraBufferParams(EncodeSurfaceStateArgs &args);
|
||||
static void encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args);
|
||||
static void encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo);
|
||||
static void appendBufferSurfaceState(EncodeSurfaceStateArgs &args);
|
||||
|
||||
|
@ -478,6 +478,9 @@ template <typename Family>
|
||||
void EncodeSurfaceState<Family>::appendParamsForImageFromBuffer(R_SURFACE_STATE *surfaceState) {
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeSurfaceState<Family>::encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args) {}
|
||||
|
||||
template <typename Family>
|
||||
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset) {
|
||||
|
||||
|
@ -649,25 +649,7 @@ void EncodeSurfaceState<Family>::encodeExtraBufferParams(EncodeSurfaceStateArgs
|
||||
|
||||
encodeExtraCacheSettings(surfaceState, *args.gmmHelper->getHardwareInfo());
|
||||
|
||||
if constexpr (Family::isUsingMultiGpuProgrammingInSurfaceState) {
|
||||
bool enablePartialWrites = args.implicitScaling;
|
||||
bool enableMultiGpuAtomics = enablePartialWrites;
|
||||
|
||||
if (DebugManager.flags.EnableMultiGpuAtomicsOptimization.get()) {
|
||||
enableMultiGpuAtomics = args.useGlobalAtomics && (enablePartialWrites || args.areMultipleSubDevicesInContext);
|
||||
}
|
||||
|
||||
surfaceState->setDisableSupportForMultiGpuAtomics(!enableMultiGpuAtomics);
|
||||
surfaceState->setDisableSupportForMultiGpuPartialWrites(!enablePartialWrites);
|
||||
|
||||
if (DebugManager.flags.ForceMultiGpuAtomics.get() != -1) {
|
||||
surfaceState->setDisableSupportForMultiGpuAtomics(!!DebugManager.flags.ForceMultiGpuAtomics.get());
|
||||
}
|
||||
|
||||
if (DebugManager.flags.ForceMultiGpuPartialWrites.get() != -1) {
|
||||
surfaceState->setDisableSupportForMultiGpuPartialWrites(!!DebugManager.flags.ForceMultiGpuPartialWrites.get());
|
||||
}
|
||||
}
|
||||
encodeImplicitScalingParams(args);
|
||||
|
||||
if (EncodeSurfaceState<Family>::isAuxModeEnabled(surfaceState, gmm)) {
|
||||
auto resourceFormat = gmm->gmmResourceInfo->getResourceFormat();
|
||||
|
@ -31,6 +31,28 @@ template <>
|
||||
inline void EncodeSurfaceState<Family>::encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo) {
|
||||
}
|
||||
|
||||
template <>
|
||||
void EncodeSurfaceState<Family>::encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args) {
|
||||
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(args.outMemory);
|
||||
bool enablePartialWrites = args.implicitScaling;
|
||||
bool enableMultiGpuAtomics = enablePartialWrites;
|
||||
|
||||
if (DebugManager.flags.EnableMultiGpuAtomicsOptimization.get()) {
|
||||
enableMultiGpuAtomics = args.useGlobalAtomics && (enablePartialWrites || args.areMultipleSubDevicesInContext);
|
||||
}
|
||||
|
||||
surfaceState->setDisableSupportForMultiGpuAtomics(!enableMultiGpuAtomics);
|
||||
surfaceState->setDisableSupportForMultiGpuPartialWrites(!enablePartialWrites);
|
||||
|
||||
if (DebugManager.flags.ForceMultiGpuAtomics.get() != -1) {
|
||||
surfaceState->setDisableSupportForMultiGpuAtomics(!!DebugManager.flags.ForceMultiGpuAtomics.get());
|
||||
}
|
||||
|
||||
if (DebugManager.flags.ForceMultiGpuPartialWrites.get() != -1) {
|
||||
surfaceState->setDisableSupportForMultiGpuPartialWrites(!!DebugManager.flags.ForceMultiGpuPartialWrites.get());
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {
|
||||
}
|
||||
|
@ -31,7 +31,6 @@ struct XeHpCore {
|
||||
static constexpr bool isUsingMediaSamplerDopClockGate = true;
|
||||
static constexpr bool supportsSampler = true;
|
||||
static constexpr bool isUsingGenericMediaStateClear = true;
|
||||
static constexpr bool isUsingMultiGpuProgrammingInSurfaceState = true;
|
||||
|
||||
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
|
||||
union {
|
||||
|
@ -37,7 +37,6 @@ struct XE_HPC_CORE {
|
||||
static constexpr bool isUsingMediaSamplerDopClockGate = false;
|
||||
static constexpr bool supportsSampler = false;
|
||||
static constexpr bool isUsingGenericMediaStateClear = true;
|
||||
static constexpr bool isUsingMultiGpuProgrammingInSurfaceState = false;
|
||||
|
||||
static bool isXlA0(const HardwareInfo &hwInfo) {
|
||||
auto revId = hwInfo.platform.usRevId & pvcSteppingBits;
|
||||
|
@ -31,7 +31,6 @@ struct XE_HPG_CORE {
|
||||
static constexpr bool isUsingMediaSamplerDopClockGate = false;
|
||||
static constexpr bool supportsSampler = true;
|
||||
static constexpr bool isUsingGenericMediaStateClear = true;
|
||||
static constexpr bool isUsingMultiGpuProgrammingInSurfaceState = false;
|
||||
|
||||
struct DataPortBindlessSurfaceExtendedMessageDescriptor {
|
||||
union {
|
||||
|
Reference in New Issue
Block a user