diff --git a/opencl/source/xe_hp_core/image_xe_hp_core.cpp b/opencl/source/xe_hp_core/image_xe_hp_core.cpp index bd0a3c356f..a76651aa7f 100644 --- a/opencl/source/xe_hp_core/image_xe_hp_core.cpp +++ b/opencl/source/xe_hp_core/image_xe_hp_core.cpp @@ -17,25 +17,12 @@ static auto gfxCore = IGFX_XE_HP_CORE; template <> void ImageHw::appendSurfaceStateParams(Family::RENDER_SURFACE_STATE *surfaceState, uint32_t rootDeviceIndex, bool useGlobalAtomics) { - auto imageCtxType = this->context->peekContextType(); - - bool enableMultiGpuPartialWrites = (imageCtxType != ContextType::CONTEXT_TYPE_SPECIALIZED) && (context->containsMultipleSubDevices(rootDeviceIndex)); - - bool enableMultiGpuAtomics = enableMultiGpuPartialWrites; - - if (DebugManager.flags.EnableMultiGpuAtomicsOptimization.get()) { - enableMultiGpuAtomics &= useGlobalAtomics; - } - - surfaceState->setDisableSupportForMultiGpuAtomics(!enableMultiGpuAtomics); - surfaceState->setDisableSupportForMultiGpuPartialWrites(!enableMultiGpuPartialWrites); - - if (DebugManager.flags.ForceMultiGpuAtomics.get() != -1) { - surfaceState->setDisableSupportForMultiGpuAtomics(!!DebugManager.flags.ForceMultiGpuAtomics.get()); - } - if (DebugManager.flags.ForceMultiGpuPartialWrites.get() != -1) { - surfaceState->setDisableSupportForMultiGpuPartialWrites(!!DebugManager.flags.ForceMultiGpuPartialWrites.get()); - } + EncodeSurfaceStateArgs args{}; + args.outMemory = surfaceState; + args.useGlobalAtomics = useGlobalAtomics; + args.areMultipleSubDevicesInContext = context->containsMultipleSubDevices(rootDeviceIndex); + args.implicitScaling = args.areMultipleSubDevicesInContext; + EncodeSurfaceState::encodeImplicitScalingParams(args); } } // namespace NEO #include "opencl/source/mem_obj/image_tgllp_and_later.inl" diff --git a/opencl/test/unit_test/xe_hp_core/xehp/test_image_xe_hp_sdv.inl b/opencl/test/unit_test/xe_hp_core/xehp/test_image_xe_hp_sdv.inl index 4de4a3e700..a27ab26467 100644 --- a/opencl/test/unit_test/xe_hp_core/xehp/test_image_xe_hp_sdv.inl +++ b/opencl/test/unit_test/xe_hp_core/xehp/test_image_xe_hp_sdv.inl @@ -18,7 +18,6 @@ #include "opencl/test/unit_test/mocks/mock_platform.h" using XeHpSdvImageTests = ::testing::Test; -using isXePlatform = IsWithinGfxCore; XEHPTEST_F(XeHpSdvImageTests, givenContextTypeDefaultWhenImageIsWritableAndOnlyOneTileIsAvailableThenRemainFlagsToTrue) { DebugManagerStateRestore restorer; @@ -247,27 +246,30 @@ XEHPTEST_F(XeHpSdvImageTests, givenContextTypeSpecializedWhenImageIsWritableThen } struct MultiGpuGlobalAtomicsImageTest : public XeHpSdvImageTests, - public ::testing::WithParamInterface> { + public ::testing::WithParamInterface> { }; XEHPTEST_P(MultiGpuGlobalAtomicsImageTest, givenAppendSurfaceStateParamCalledThenDisableSupportForMultiGpuAtomicsIsSetCorrectly) { - unsigned int numAvailableDevices, memFlags; - ContextType contextType; + using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; + unsigned int numAvailableSubDevices, memFlags; bool useGlobalAtomics, enableMultiGpuAtomicsOptimization; - std::tie(numAvailableDevices, memFlags, contextType, useGlobalAtomics, enableMultiGpuAtomicsOptimization) = GetParam(); + std::tie(numAvailableSubDevices, memFlags, useGlobalAtomics, enableMultiGpuAtomicsOptimization) = GetParam(); DebugManagerStateRestore restorer; DebugManager.flags.EnableMultiGpuAtomicsOptimization.set(enableMultiGpuAtomicsOptimization); - DebugManager.flags.CreateMultipleSubDevices.set(numAvailableDevices); - initPlatform(); - if (numAvailableDevices == 1) { - EXPECT_EQ(0u, platform()->getClDevice(0)->getNumGenericSubDevices()); - } else { - EXPECT_EQ(numAvailableDevices, platform()->getClDevice(0)->getNumGenericSubDevices()); + + UltClDeviceFactory deviceFactory{1, 2}; + + ClDeviceVector deviceVector; + + for (auto i = 0u; i < numAvailableSubDevices; i++) { + deviceVector.push_back(deviceFactory.subDevices[i]); } - using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; - MockContext context(platform()->getClDevice(0)); - context.contextType = contextType; + if (deviceVector.empty()) { + deviceVector.push_back(deviceFactory.rootDevices[0]); + } + + MockContext context(deviceVector); cl_int retVal = CL_SUCCESS; cl_image_format imageFormat = {}; @@ -294,23 +296,21 @@ XEHPTEST_P(MultiGpuGlobalAtomicsImageTest, givenAppendSurfaceStateParamCalledThe surfaceState.setDisableSupportForMultiGpuPartialWrites(false); imageHw->appendSurfaceStateParams(&surfaceState, context.getDevice(0)->getRootDeviceIndex(), useGlobalAtomics); - bool enableGlobalAtomics = (contextType != ContextType::CONTEXT_TYPE_SPECIALIZED) && (numAvailableDevices > 1); + bool enableGlobalAtomics = numAvailableSubDevices != 1u; if (enableMultiGpuAtomicsOptimization) { enableGlobalAtomics &= useGlobalAtomics; } EXPECT_EQ(!enableGlobalAtomics, surfaceState.getDisableSupportForMultiGpuAtomics()); } -static unsigned int numAvailableDevicesForMultiGpuGlobalAtomicsImageTest[] = {1, 2}; +static unsigned int numAvailableSubDevicesForMultiGpuGlobalAtomicsImageTest[] = {0, 1, 2}; static unsigned int memFlags[] = {CL_MEM_READ_ONLY, CL_MEM_READ_WRITE}; -static ContextType contextTypes[] = {ContextType::CONTEXT_TYPE_DEFAULT, ContextType::CONTEXT_TYPE_SPECIALIZED, ContextType::CONTEXT_TYPE_UNRESTRICTIVE}; INSTANTIATE_TEST_CASE_P(MultiGpuGlobalAtomicsImageTest, MultiGpuGlobalAtomicsImageTest, ::testing::Combine( - ::testing::ValuesIn(numAvailableDevicesForMultiGpuGlobalAtomicsImageTest), + ::testing::ValuesIn(numAvailableSubDevicesForMultiGpuGlobalAtomicsImageTest), ::testing::ValuesIn(memFlags), - ::testing::ValuesIn(contextTypes), ::testing::Bool(), ::testing::Bool())); diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index fad8b8b2e4..a623eb539d 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -257,6 +257,7 @@ struct EncodeSurfaceState { static void encodeBuffer(EncodeSurfaceStateArgs &args); static void encodeExtraBufferParams(EncodeSurfaceStateArgs &args); + static void encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args); static void encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo); static void appendBufferSurfaceState(EncodeSurfaceStateArgs &args); diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index 8ba7c9e8e1..ce4fe77dd9 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -478,6 +478,9 @@ template void EncodeSurfaceState::appendParamsForImageFromBuffer(R_SURFACE_STATE *surfaceState) { } +template +void EncodeSurfaceState::encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args) {} + template void *EncodeDispatchKernel::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset) { diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index f047d497b4..cdae17c5e0 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -649,25 +649,7 @@ void EncodeSurfaceState::encodeExtraBufferParams(EncodeSurfaceStateArgs encodeExtraCacheSettings(surfaceState, *args.gmmHelper->getHardwareInfo()); - if constexpr (Family::isUsingMultiGpuProgrammingInSurfaceState) { - bool enablePartialWrites = args.implicitScaling; - bool enableMultiGpuAtomics = enablePartialWrites; - - if (DebugManager.flags.EnableMultiGpuAtomicsOptimization.get()) { - enableMultiGpuAtomics = args.useGlobalAtomics && (enablePartialWrites || args.areMultipleSubDevicesInContext); - } - - surfaceState->setDisableSupportForMultiGpuAtomics(!enableMultiGpuAtomics); - surfaceState->setDisableSupportForMultiGpuPartialWrites(!enablePartialWrites); - - if (DebugManager.flags.ForceMultiGpuAtomics.get() != -1) { - surfaceState->setDisableSupportForMultiGpuAtomics(!!DebugManager.flags.ForceMultiGpuAtomics.get()); - } - - if (DebugManager.flags.ForceMultiGpuPartialWrites.get() != -1) { - surfaceState->setDisableSupportForMultiGpuPartialWrites(!!DebugManager.flags.ForceMultiGpuPartialWrites.get()); - } - } + encodeImplicitScalingParams(args); if (EncodeSurfaceState::isAuxModeEnabled(surfaceState, gmm)) { auto resourceFormat = gmm->gmmResourceInfo->getResourceFormat(); diff --git a/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp b/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp index 7e10b92352..af9cc95d74 100644 --- a/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp +++ b/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp @@ -31,6 +31,28 @@ template <> inline void EncodeSurfaceState::encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo) { } +template <> +void EncodeSurfaceState::encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args) { + auto surfaceState = reinterpret_cast(args.outMemory); + bool enablePartialWrites = args.implicitScaling; + bool enableMultiGpuAtomics = enablePartialWrites; + + if (DebugManager.flags.EnableMultiGpuAtomicsOptimization.get()) { + enableMultiGpuAtomics = args.useGlobalAtomics && (enablePartialWrites || args.areMultipleSubDevicesInContext); + } + + surfaceState->setDisableSupportForMultiGpuAtomics(!enableMultiGpuAtomics); + surfaceState->setDisableSupportForMultiGpuPartialWrites(!enablePartialWrites); + + if (DebugManager.flags.ForceMultiGpuAtomics.get() != -1) { + surfaceState->setDisableSupportForMultiGpuAtomics(!!DebugManager.flags.ForceMultiGpuAtomics.get()); + } + + if (DebugManager.flags.ForceMultiGpuPartialWrites.get() != -1) { + surfaceState->setDisableSupportForMultiGpuPartialWrites(!!DebugManager.flags.ForceMultiGpuPartialWrites.get()); + } +} + template <> void EncodeDispatchKernel::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) { } diff --git a/shared/source/xe_hp_core/hw_cmds_base.h b/shared/source/xe_hp_core/hw_cmds_base.h index 21215f68f2..b1df4ed3be 100644 --- a/shared/source/xe_hp_core/hw_cmds_base.h +++ b/shared/source/xe_hp_core/hw_cmds_base.h @@ -31,7 +31,6 @@ struct XeHpCore { static constexpr bool isUsingMediaSamplerDopClockGate = true; static constexpr bool supportsSampler = true; static constexpr bool isUsingGenericMediaStateClear = true; - static constexpr bool isUsingMultiGpuProgrammingInSurfaceState = true; struct DataPortBindlessSurfaceExtendedMessageDescriptor { union { diff --git a/shared/source/xe_hpc_core/hw_cmds_base.h b/shared/source/xe_hpc_core/hw_cmds_base.h index 651c584b2a..17b828c193 100644 --- a/shared/source/xe_hpc_core/hw_cmds_base.h +++ b/shared/source/xe_hpc_core/hw_cmds_base.h @@ -37,7 +37,6 @@ struct XE_HPC_CORE { static constexpr bool isUsingMediaSamplerDopClockGate = false; static constexpr bool supportsSampler = false; static constexpr bool isUsingGenericMediaStateClear = true; - static constexpr bool isUsingMultiGpuProgrammingInSurfaceState = false; static bool isXlA0(const HardwareInfo &hwInfo) { auto revId = hwInfo.platform.usRevId & pvcSteppingBits; diff --git a/shared/source/xe_hpg_core/hw_cmds_base.h b/shared/source/xe_hpg_core/hw_cmds_base.h index 36edfbd9ac..950c115804 100644 --- a/shared/source/xe_hpg_core/hw_cmds_base.h +++ b/shared/source/xe_hpg_core/hw_cmds_base.h @@ -31,7 +31,6 @@ struct XE_HPG_CORE { static constexpr bool isUsingMediaSamplerDopClockGate = false; static constexpr bool supportsSampler = true; static constexpr bool isUsingGenericMediaStateClear = true; - static constexpr bool isUsingMultiGpuProgrammingInSurfaceState = false; struct DataPortBindlessSurfaceExtendedMessageDescriptor { union {