From 606a900080cbbb0932de0117ed175b9b5bbe90ce Mon Sep 17 00:00:00 2001 From: Compute-Runtime-Validation Date: Fri, 3 Feb 2023 02:11:42 +0100 Subject: [PATCH] Revert "Disable EUFusion for odd work groups with DPAS on DG2" This reverts commit 017d66a46991c50b962ee5f5ae39947845c36a3d. Signed-off-by: Compute-Runtime-Validation --- level_zero/core/source/cmdlist/cmdlist_hw.h | 2 +- level_zero/core/source/cmdlist/cmdlist_hw.inl | 12 +- .../core/source/cmdlist/cmdlist_hw_base.inl | 2 +- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 2 +- level_zero/core/source/kernel/kernel_imp.cpp | 13 +- .../sources/cmdlist/test_cmdlist_7.cpp | 15 +- .../test_cmdlist_append_launch_kernel_2.cpp | 7 +- .../test_cmdlist_append_launch_kernel_3.cpp | 5 +- .../cmdlist/test_cmdlist_xehp_and_later.cpp | 32 ++-- .../xe_hpg_core/dg2/test_cmdlist_dg2.cpp | 36 ---- .../xe_hpg_core/dg2/test_kernel_dg2.cpp | 110 ----------- .../xe_hpg_core/test_cmdlist_xe_hpg_core.cpp | 11 +- .../command_queue/cl_local_work_size.cpp | 6 +- opencl/source/command_queue/enqueue_common.h | 3 +- opencl/source/kernel/kernel.cpp | 4 +- .../test/unit_test/xe_hpg_core/CMakeLists.txt | 1 - .../unit_test/xe_hpg_core/dg2/CMakeLists.txt | 1 - .../dg2/command_queue_tests_dg2.cpp | 178 ------------------ .../dg2/test_cmds_programming_dg2.cpp | 1 - .../xe_hpg_core/local_work_size_tests_dg2.cpp | 68 ------- .../source/helpers/gfx_core_helper_base.inl | 1 + shared/source/os_interface/hw_info_config.h | 2 - shared/source/os_interface/hw_info_config.inl | 4 - .../dg2/os_agnostic_hw_info_config_dg2.inl | 16 -- .../windows/hw_info_config_dg2.cpp | 1 + .../helpers/gfx_core_helper_tests.cpp | 4 - .../dg2/excludes_xe_hpg_core_dg2.cpp | 1 - .../dg2/product_config_helper_tests_dg2.cpp | 55 ------ 28 files changed, 45 insertions(+), 548 deletions(-) delete mode 100644 opencl/test/unit_test/xe_hpg_core/dg2/command_queue_tests_dg2.cpp delete mode 100644 opencl/test/unit_test/xe_hpg_core/local_work_size_tests_dg2.cpp diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 27150d434d..35eaaeff63 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -258,7 +258,7 @@ struct CommandListCoreFamily : CommandListImp { const CmdListKernelLaunchParams &launchParams); ze_result_t prepareIndirectParams(const ze_group_count_t *threadGroupDimensions); - void updateStreamProperties(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions); + void updateStreamProperties(Kernel &kernel, bool isCooperative); void updateStateBaseAddressStreamProperties(Kernel &kernel, bool updateRequiredState, bool captureBaseAddressState); void clearCommandsToPatch(); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index bb39757d43..2c5647f2b1 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2371,21 +2371,15 @@ void CommandListCoreFamily::updateStateBaseAddressStreamPropertie } template -void CommandListCoreFamily::updateStreamProperties(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions) { +void CommandListCoreFamily::updateStreamProperties(Kernel &kernel, bool isCooperative) { using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE; auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment(); auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes; bool captureBaseAddressState = containsAnyKernel; - bool fusedEuDisabled = kernelAttributes.flags.requiresDisabledEUFusion; - auto &productHelper = device->getProductHelper(); - if (threadGroupDimensions) { - uint32_t groupCount[3] = {threadGroupDimensions->groupCountX, threadGroupDimensions->groupCountY, threadGroupDimensions->groupCountZ}; - fusedEuDisabled |= productHelper.isFusedEuDisabledForDpas(kernelAttributes.flags.usesSystolicPipelineSelectMode, kernel.getGroupSize(), groupCount); - } if (!containsAnyKernel) { - requiredStreamState.frontEndState.setProperties(isCooperative, fusedEuDisabled, true, -1, rootDeviceEnvironment); + requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, rootDeviceEnvironment); requiredStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, rootDeviceEnvironment); if (!this->isFlushTaskSubmissionEnabled) { @@ -2416,7 +2410,7 @@ void CommandListCoreFamily::updateStreamProperties(Kernel &kernel rootDeviceEnvironment); } - finalStreamState.frontEndState.setProperties(isCooperative, fusedEuDisabled, true, -1, rootDeviceEnvironment); + finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, rootDeviceEnvironment); bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get(); if (finalStreamState.frontEndState.isDirty() && logicalStateHelperBlock) { if (isPatchingVfeStateAllowed) { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index 49bb84041e..52ba9dd4d2 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -138,7 +138,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K std::list additionalCommands; - updateStreamProperties(*kernel, launchParams.isCooperative, threadGroupDimensions); + updateStreamProperties(*kernel, launchParams.isCooperative); NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ 0, // eventAddress neoDevice, // device diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 1968263702..20398bcd3d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -254,7 +254,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs(); this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs(); - updateStreamProperties(*kernel, launchParams.isCooperative, threadGroupDimensions); + updateStreamProperties(*kernel, launchParams.isCooperative); auto localMemSize = static_cast(neoDevice->getDeviceInfo().localMemSize); auto slmTotalSize = kernelImp->getSlmTotalSize(); diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 087239766b..717e97767e 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -368,12 +368,9 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } - bool requiresEuFusionDisable = kernelImmData->getDescriptor().kernelAttributes.flags.requiresDisabledEUFusion || - neoDevice->getProductHelper().isFusedEuDisabledForDpas(kernelImmData->getDescriptor().kernelAttributes.flags.usesSystolicPipelineSelectMode, nullptr, nullptr); - NEO::WorkSizeInfo wsInfo(maxWorkGroupSize, kernelImmData->getDescriptor().kernelAttributes.usesBarriers(), simd, this->getSlmTotalSize(), neoDevice->getRootDeviceEnvironment(), numThreadsPerSubSlice, localMemSize, - usesImages, false, requiresEuFusionDisable); + usesImages, false, kernelImmData->getDescriptor().kernelAttributes.flags.requiresDisabledEUFusion); NEO::computeWorkgroupSizeND(wsInfo, retGroupSize, workItems, dim); } else { if (1U == dim) { @@ -384,6 +381,7 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz NEO::computeWorkgroupSize2D(maxWorkGroupSize, retGroupSize, workItems, simd); } } + *groupSizeX = static_cast(retGroupSize[0]); *groupSizeY = static_cast(retGroupSize[1]); *groupSizeZ = static_cast(retGroupSize[2]); @@ -749,12 +747,7 @@ ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties) preferredGroupSizeProperties->preferredMultiple = this->kernelImmData->getKernelInfo()->getMaxSimdSize(); auto &gfxCoreHelper = this->module->getDevice()->getGfxCoreHelper(); - auto &productHelper = this->module->getDevice()->getProductHelper(); - - bool requiresEuFusionDisabled = kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion || - productHelper.isFusedEuDisabledForDpas(kernelImmData->getDescriptor().kernelAttributes.flags.usesSystolicPipelineSelectMode, nullptr, nullptr); - - if (gfxCoreHelper.isFusedEuDispatchEnabled(this->module->getDevice()->getHwInfo(), requiresEuFusionDisabled)) { + if (gfxCoreHelper.isFusedEuDispatchEnabled(this->module->getDevice()->getHwInfo(), kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion)) { preferredGroupSizeProperties->preferredMultiple *= 2; } } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index 34cdf374d0..36af1cf36c 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -340,8 +340,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenUpdateStr ASSERT_EQ(ZE_RESULT_SUCCESS, result); const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100; - const ze_group_count_t launchKernelArgs = {}; - commandList->updateStreamProperties(kernel, false, &launchKernelArgs); + commandList->updateStreamProperties(kernel, false); if (commandList->stateComputeModeTracking) { EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); @@ -351,7 +350,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenUpdateStr } const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80; - commandList->updateStreamProperties(kernel, false, &launchKernelArgs); + commandList->updateStreamProperties(kernel, false); EXPECT_EQ(productHelper.isGrfNumReportedWithScm(), commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); } @@ -378,8 +377,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, ASSERT_EQ(ZE_RESULT_SUCCESS, result); const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100; - const ze_group_count_t launchKernelArgs = {}; - commandList->updateStreamProperties(kernel, false, &launchKernelArgs); + commandList->updateStreamProperties(kernel, false); if (commandList->stateComputeModeTracking) { EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); if (productHelper.isGrfNumReportedWithScm()) { @@ -393,7 +391,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, } const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80; - commandList->updateStreamProperties(kernel, false, &launchKernelArgs); + commandList->updateStreamProperties(kernel, false); EXPECT_EQ(productHelper.isGrfNumReportedWithScm(), commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); } @@ -411,8 +409,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenPropertes ASSERT_EQ(ZE_RESULT_SUCCESS, result); const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100; - const ze_group_count_t launchKernelArgs = {}; - commandList->updateStreamProperties(kernel, false, &launchKernelArgs); + commandList->updateStreamProperties(kernel, false); if (commandList->stateComputeModeTracking) { EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); @@ -421,7 +418,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenPropertes EXPECT_EQ(productHelper.isGrfNumReportedWithScm(), commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); } - commandList->updateStreamProperties(kernel, false, &launchKernelArgs); + commandList->updateStreamProperties(kernel, false); EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp index 62faee53dc..3171670cee 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp @@ -1374,19 +1374,18 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamProp ASSERT_EQ(ZE_RESULT_SUCCESS, result); // initial kernel with no policy preference - const ze_group_count_t launchKernelArgs = {}; - pCommandList->updateStreamProperties(kernel, false, &launchKernelArgs); + pCommandList->updateStreamProperties(kernel, false); EXPECT_EQ(defaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value); // policy changed to non-default state pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value = nonDefaultThreadArbitrationPolicy; // another kernel with no policy preference - do not update policy - pCommandList->updateStreamProperties(kernel, false, &launchKernelArgs); + pCommandList->updateStreamProperties(kernel, false); EXPECT_EQ(nonDefaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value); // another kernel with no policy preference, this time with debug toggle set - update policy back to default value DebugManager.flags.ForceDefaultThreadArbitrationPolicyIfNotSpecified.set(true); - pCommandList->updateStreamProperties(kernel, false, &launchKernelArgs); + pCommandList->updateStreamProperties(kernel, false); EXPECT_EQ(defaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 06d6b0d017..17f76922aa 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -372,12 +372,11 @@ HWTEST2_F(CommandListAppendLaunchKernel, whenUpdateStreamPropertiesIsCalledThenR const auto &productHelper = device->getProductHelper(); int32_t expectedDisableOverdispatch = productHelper.isDisableOverdispatchAvailable(*defaultHwInfo) ? 1 : -1; - const ze_group_count_t launchKernelArgs = {}; - pCommandList->updateStreamProperties(kernel, false, &launchKernelArgs); + pCommandList->updateStreamProperties(kernel, false); EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value); EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value); - pCommandList->updateStreamProperties(kernel, false, &launchKernelArgs); + pCommandList->updateStreamProperties(kernel, false); EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value); EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index bbc572cdad..628d91cdc7 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -217,24 +217,23 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsWhenUpdateStreamProp auto &productHelper = device->getProductHelper(); int32_t expectedDispatchAllWalkerEnable = productHelper.isComputeDispatchAllWalkerEnableInCfeStateRequired(device->getHwInfo()) ? 0 : -1; - const ze_group_count_t launchKernelArgs = {}; - pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs); + pCommandList->updateStreamProperties(defaultKernel, false); EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->finalStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(0u, pCommandList->commandsToPatch.size()); pCommandList->reset(); - pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs); - pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs); + pCommandList->updateStreamProperties(cooperativeKernel, true); + pCommandList->updateStreamProperties(cooperativeKernel, true); expectedDispatchAllWalkerEnable = expectedDispatchAllWalkerEnable != -1 ? 1 : expectedDispatchAllWalkerEnable; EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->finalStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(0u, pCommandList->commandsToPatch.size()); pCommandList->reset(); - pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs); - pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs); + pCommandList->updateStreamProperties(defaultKernel, false); + pCommandList->updateStreamProperties(cooperativeKernel, true); expectedDispatchAllWalkerEnable = expectedDispatchAllWalkerEnable != -1 ? 0 : expectedDispatchAllWalkerEnable; EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value); expectedDispatchAllWalkerEnable = expectedDispatchAllWalkerEnable != -1 ? 1 : expectedDispatchAllWalkerEnable; @@ -243,18 +242,18 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsWhenUpdateStreamProp EXPECT_EQ(expectedCommandsToPatch, pCommandList->commandsToPatch.size()); pCommandList->reset(); - pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs); - pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs); - pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs); + pCommandList->updateStreamProperties(cooperativeKernel, true); + pCommandList->updateStreamProperties(defaultKernel, false); + pCommandList->updateStreamProperties(cooperativeKernel, true); EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value); EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->finalStreamState.frontEndState.computeDispatchAllWalkerEnable.value); expectedCommandsToPatch = expectedCommandsToPatch != 0 ? 2 : 0; EXPECT_EQ(expectedCommandsToPatch, pCommandList->commandsToPatch.size()); pCommandList->reset(); - pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs); - pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs); - pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs); + pCommandList->updateStreamProperties(defaultKernel, false); + pCommandList->updateStreamProperties(defaultKernel, false); + pCommandList->updateStreamProperties(cooperativeKernel, true); expectedDispatchAllWalkerEnable = expectedDispatchAllWalkerEnable != -1 ? 0 : expectedDispatchAllWalkerEnable; EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value); expectedDispatchAllWalkerEnable = expectedDispatchAllWalkerEnable != -1 ? 1 : expectedDispatchAllWalkerEnable; @@ -284,15 +283,14 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsAndPatchingDisallowe auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - const ze_group_count_t launchKernelArgs = {}; - pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs); - pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs); + pCommandList->updateStreamProperties(defaultKernel, false); + pCommandList->updateStreamProperties(cooperativeKernel, true); EXPECT_EQ(0u, pCommandList->commandsToPatch.size()); pCommandList->reset(); DebugManager.flags.AllowPatchingVfeStateInCommandLists.set(1); - pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs); - pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs); + pCommandList->updateStreamProperties(defaultKernel, false); + pCommandList->updateStreamProperties(cooperativeKernel, true); const auto &productHelper = device->getProductHelper(); size_t expectedCmdsToPatch = productHelper.isComputeDispatchAllWalkerEnableInCfeStateRequired(device->getHwInfo()) ? 1 : 0; diff --git a/level_zero/core/test/unit_tests/xe_hpg_core/dg2/test_cmdlist_dg2.cpp b/level_zero/core/test/unit_tests/xe_hpg_core/dg2/test_cmdlist_dg2.cpp index f04d62b1f2..fcb5dd0c2d 100644 --- a/level_zero/core/test/unit_tests/xe_hpg_core/dg2/test_cmdlist_dg2.cpp +++ b/level_zero/core/test/unit_tests/xe_hpg_core/dg2/test_cmdlist_dg2.cpp @@ -13,7 +13,6 @@ #include "level_zero/core/source/xe_hpg_core/cmdlist_xe_hpg_core.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" -#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" namespace L0 { @@ -62,40 +61,5 @@ HWTEST2_F(CommandListTests, givenDG2WithBSteppingWhenCreatingCommandListThenAddi EXPECT_TRUE(cmdSba->getDynamicStateBaseAddressModifyEnable()); EXPECT_TRUE(cmdSba->getDynamicStateBufferSizeModifyEnable()); } -HWTEST2_F(CommandListTests, GivenKernelWithDpasWhenLwsIsOddThenFesedEuIsDisabled, IsDG2) { - Mock<::L0::Kernel> kernel; - auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); - kernel.module = pMockModule.get(); - - auto commandList = std::make_unique>>(); - auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - const_cast(&kernel.getKernelDescriptor())->kernelAttributes.flags.usesSystolicPipelineSelectMode = true; - const ze_group_count_t launchKernelArgs = {3, 1, 1}; - kernel.groupSize[0] = 7; - kernel.groupSize[1] = 1; - kernel.groupSize[2] = 1; - commandList->updateStreamProperties(kernel, false, &launchKernelArgs); - EXPECT_TRUE(commandList->finalStreamState.frontEndState.disableEUFusion.value); -} -HWTEST2_F(CommandListTests, GivenKernelWithDpasWhenLwsIsNonOddThenFesedEuIsNotDisabled, IsDG2) { - Mock<::L0::Kernel> kernel; - auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); - kernel.module = pMockModule.get(); - - auto commandList = std::make_unique>>(); - auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - const_cast(&kernel.getKernelDescriptor())->kernelAttributes.flags.usesSystolicPipelineSelectMode = true; - const ze_group_count_t launchKernelArgs = {3, 1, 1}; - kernel.groupSize[0] = 8; - kernel.groupSize[1] = 1; - kernel.groupSize[2] = 1; - commandList->updateStreamProperties(kernel, false, &launchKernelArgs); - EXPECT_FALSE(commandList->finalStreamState.frontEndState.disableEUFusion.value); -} - } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/xe_hpg_core/dg2/test_kernel_dg2.cpp b/level_zero/core/test/unit_tests/xe_hpg_core/dg2/test_kernel_dg2.cpp index b39698edfa..1612001984 100644 --- a/level_zero/core/test/unit_tests/xe_hpg_core/dg2/test_kernel_dg2.cpp +++ b/level_zero/core/test/unit_tests/xe_hpg_core/dg2/test_kernel_dg2.cpp @@ -69,115 +69,5 @@ HWTEST2_F(KernelTestDG2, givenKernelImpWhenSetBufferSurfaceStateCalledThenProgra context->freeMem(devicePtr); } -HWTEST2_F(KernelTestDG2, givenKernelImpWithDpasWhenSuggestingWGSizeThenSizeIsDifferntThanWithoutDpas, IsDG2) { - using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; - ze_kernel_handle_t kernelHandle; - - ze_kernel_desc_t kernelDesc = {}; - kernelDesc.pKernelName = kernelName.c_str(); - - ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); - - ASSERT_EQ(ZE_RESULT_SUCCESS, res); - - auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); - - reinterpret_cast(module->getDevice()->getNEODevice())->deviceInfo.maxNumEUsPerSubSlice = 16; - reinterpret_cast(module->getDevice()->getNEODevice())->deviceInfo.numThreadsPerEU = 8; - uint32_t groupSizeXDpas = 79u; - uint32_t groupSizeYDpas = 14; - uint32_t groupSizeZDpas = 1u; - const_cast(kernelImp->getImmutableData()->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = true; - kernelImp->suggestGroupSize(groupSizeXDpas, groupSizeYDpas, groupSizeZDpas, &groupSizeXDpas, &groupSizeYDpas, &groupSizeZDpas); - uint32_t groupSizeXNoDpas = 79u; - uint32_t groupSizeYNoDpas = 14u; - uint32_t groupSizeZNoDpas = 1u; - const_cast(kernelImp->getImmutableData()->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = false; - kernelImp->suggestGroupSize(groupSizeXNoDpas, groupSizeYNoDpas, groupSizeZNoDpas, &groupSizeXNoDpas, &groupSizeYNoDpas, &groupSizeZNoDpas); - EXPECT_TRUE(groupSizeXDpas != groupSizeXNoDpas || groupSizeYDpas != groupSizeYNoDpas); - Kernel::fromHandle(kernelHandle)->destroy(); -} - -HWTEST2_F(KernelTestDG2, givenKernelImpWithFusedEuDisabledWhenSuggestingWGSizeThenSizeIsDifferntThanWithoutDpas, IsDG2) { - using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; - ze_kernel_handle_t kernelHandle; - - ze_kernel_desc_t kernelDesc = {}; - kernelDesc.pKernelName = kernelName.c_str(); - - ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); - - ASSERT_EQ(ZE_RESULT_SUCCESS, res); - - auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); - - reinterpret_cast(module->getDevice()->getNEODevice())->deviceInfo.maxNumEUsPerSubSlice = 16; - reinterpret_cast(module->getDevice()->getNEODevice())->deviceInfo.numThreadsPerEU = 8; - uint32_t groupSizeXEuFusionDisabled = 79u; - uint32_t groupSizeYEuFusionDisabled = 14; - uint32_t groupSizeZEuFusionDisabled = 1u; - const_cast(kernelImp->getImmutableData()->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = true; - kernelImp->suggestGroupSize(groupSizeXEuFusionDisabled, groupSizeYEuFusionDisabled, groupSizeZEuFusionDisabled, &groupSizeXEuFusionDisabled, &groupSizeYEuFusionDisabled, &groupSizeZEuFusionDisabled); - uint32_t groupSizeXNoEuFusionDisabled = 79u; - uint32_t groupSizeYNoEuFusionDisabled = 14; - uint32_t groupSizeZNoEuFusionDisabled = 1u; - const_cast(kernelImp->getImmutableData()->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = false; - kernelImp->suggestGroupSize(groupSizeXNoEuFusionDisabled, groupSizeYNoEuFusionDisabled, groupSizeZNoEuFusionDisabled, &groupSizeXNoEuFusionDisabled, &groupSizeYNoEuFusionDisabled, &groupSizeZNoEuFusionDisabled); - EXPECT_TRUE(groupSizeXEuFusionDisabled != groupSizeXNoEuFusionDisabled || groupSizeYEuFusionDisabled != groupSizeYNoEuFusionDisabled); - Kernel::fromHandle(kernelHandle)->destroy(); -} - -HWTEST2_F(KernelTestDG2, givenKernelImpWithDpasWhenGetPreferredWorkGroupSizeThenReturnedSizeIsLowerThanSizeForKernelWithoutDpas, IsDG2) { - using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; - ze_kernel_handle_t kernelHandle; - - ze_kernel_desc_t kernelDesc = {}; - kernelDesc.pKernelName = kernelName.c_str(); - - ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); - - ASSERT_EQ(ZE_RESULT_SUCCESS, res); - - auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); - ze_kernel_properties_t properties = {}; - ze_kernel_preferred_group_size_properties_t extProperties = {}; - extProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PREFERRED_GROUP_SIZE_PROPERTIES; - properties.pNext = &extProperties; - const_cast(kernelImp->getImmutableData()->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = true; - kernelImp->getProperties(&properties); - auto sizeWithDpas = extProperties.preferredMultiple; - const_cast(kernelImp->getImmutableData()->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = false; - kernelImp->getProperties(&properties); - auto sizeWithoutDpas = extProperties.preferredMultiple; - EXPECT_NE(sizeWithDpas, sizeWithoutDpas); - Kernel::fromHandle(kernelHandle)->destroy(); -} - -HWTEST2_F(KernelTestDG2, givenKernelImpWithFusedEuDisabledWhenGetPreferredWorkGroupSizeThenReturnedSizeIsLowerThanSizeForKernelWithoutFusedEuEnabled, IsDG2) { - using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; - ze_kernel_handle_t kernelHandle; - - ze_kernel_desc_t kernelDesc = {}; - kernelDesc.pKernelName = kernelName.c_str(); - - ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle); - - ASSERT_EQ(ZE_RESULT_SUCCESS, res); - - auto kernelImp = reinterpret_cast(L0::Kernel::fromHandle(kernelHandle)); - ze_kernel_properties_t properties = {}; - ze_kernel_preferred_group_size_properties_t extProperties = {}; - extProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PREFERRED_GROUP_SIZE_PROPERTIES; - properties.pNext = &extProperties; - const_cast(kernelImp->getImmutableData()->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = true; - kernelImp->getProperties(&properties); - auto sizeWithDpas = extProperties.preferredMultiple; - const_cast(kernelImp->getImmutableData()->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = false; - kernelImp->getProperties(&properties); - auto sizeWithoutDpas = extProperties.preferredMultiple; - EXPECT_NE(sizeWithDpas, sizeWithoutDpas); - Kernel::fromHandle(kernelHandle)->destroy(); -} - } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp b/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp index 9357cdd5b3..d928aad942 100644 --- a/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpg_core/test_cmdlist_xe_hpg_core.cpp @@ -348,8 +348,7 @@ HWTEST2_F(CommandListCreate, GivenComputeModePropertiesWhenUpdateStreamPropertie ASSERT_EQ(ZE_RESULT_SUCCESS, result); const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100; - const ze_group_count_t launchKernelArgs = {}; - commandList->updateStreamProperties(kernel, false, &launchKernelArgs); + commandList->updateStreamProperties(kernel, false); if (commandList->stateComputeModeTracking) { EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); if (productHelper.isGrfNumReportedWithScm()) { @@ -362,7 +361,7 @@ HWTEST2_F(CommandListCreate, GivenComputeModePropertiesWhenUpdateStreamPropertie EXPECT_TRUE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); } const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80; - commandList->updateStreamProperties(kernel, false, &launchKernelArgs); + commandList->updateStreamProperties(kernel, false); EXPECT_TRUE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); } @@ -386,8 +385,7 @@ HWTEST2_F(CommandListCreate, GivenComputeModePropertiesWhenUpdateStreamPropertie auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); ASSERT_EQ(ZE_RESULT_SUCCESS, result); const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100; - const ze_group_count_t launchKernelArgs = {}; - commandList->updateStreamProperties(kernel, false, &launchKernelArgs); + commandList->updateStreamProperties(kernel, false); if (commandList->stateComputeModeTracking) { EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); @@ -395,8 +393,9 @@ HWTEST2_F(CommandListCreate, GivenComputeModePropertiesWhenUpdateStreamPropertie EXPECT_TRUE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); EXPECT_TRUE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); } + const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80; - commandList->updateStreamProperties(kernel, false, &launchKernelArgs); + commandList->updateStreamProperties(kernel, false); EXPECT_TRUE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); } diff --git a/opencl/source/command_queue/cl_local_work_size.cpp b/opencl/source/command_queue/cl_local_work_size.cpp index e6ca56851d..c92d6a5764 100644 --- a/opencl/source/command_queue/cl_local_work_size.cpp +++ b/opencl/source/command_queue/cl_local_work_size.cpp @@ -97,9 +97,6 @@ WorkSizeInfo createWorkSizeInfoFromDispatchInfo(const DispatchInfo &dispatchInfo const auto &kernelInfo = dispatchInfo.getKernel()->getKernelInfo(); auto numThreadsPerSubSlice = static_cast(device.getSharedDeviceInfo().maxNumEUsPerSubSlice) * device.getSharedDeviceInfo().numThreadsPerEU; - bool requiresEuFusionDisabled = kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion || - device.getProductHelper().isFusedEuDisabledForDpas(kernelInfo.kernelDescriptor.kernelAttributes.flags.usesSystolicPipelineSelectMode, nullptr, nullptr); - WorkSizeInfo wsInfo(dispatchInfo.getKernel()->getMaxKernelWorkGroupSize(), kernelInfo.kernelDescriptor.kernelAttributes.usesBarriers(), static_cast(kernelInfo.getMaxSimdSize()), @@ -109,8 +106,7 @@ WorkSizeInfo createWorkSizeInfoFromDispatchInfo(const DispatchInfo &dispatchInfo static_cast(device.getSharedDeviceInfo().localMemSize), false, false, - requiresEuFusionDisabled); - + kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion); wsInfo.setIfUseImg(kernelInfo); return wsInfo; diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 8b2cb0ddbd..d6bc71c74b 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -855,8 +855,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired; dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = systolicPipelineSelectMode; - dispatchFlags.disableEUFusion = kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion || - device->getProductHelper().isFusedEuDisabledForDpas(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesSystolicPipelineSelectMode, *kernel->getLocalWorkSizeValues().data(), *kernel->getNumWorkGroupsValues().data()); + dispatchFlags.disableEUFusion = kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion; const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired(); diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 66a478bd1c..3865dca370 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -567,7 +567,6 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName, size_t maxWorkgroupSize; const auto &hwInfo = clDevice.getHardwareInfo(); auto &gfxCoreHelper = clDevice.getGfxCoreHelper(); - auto &productHelper = clDevice.getProductHelper(); auto &clGfxCoreHelper = clDevice.getRootDeviceEnvironment().getHelper(); GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); @@ -598,8 +597,7 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName, case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: preferredWorkGroupSizeMultiple = kernelInfo.getMaxSimdSize(); - if (gfxCoreHelper.isFusedEuDispatchEnabled(hwInfo, kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion) && - !productHelper.isFusedEuDisabledForDpas(kernelDescriptor.kernelAttributes.flags.usesSystolicPipelineSelectMode, nullptr, nullptr)) { + if (gfxCoreHelper.isFusedEuDispatchEnabled(hwInfo, kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion)) { preferredWorkGroupSizeMultiple *= 2; } srcSize = sizeof(preferredWorkGroupSizeMultiple); diff --git a/opencl/test/unit_test/xe_hpg_core/CMakeLists.txt b/opencl/test/unit_test/xe_hpg_core/CMakeLists.txt index b9b69a1ff3..451bcaebe1 100644 --- a/opencl/test/unit_test/xe_hpg_core/CMakeLists.txt +++ b/opencl/test/unit_test/xe_hpg_core/CMakeLists.txt @@ -17,7 +17,6 @@ if(TESTS_XE_HPG_CORE) ${CMAKE_CURRENT_SOURCE_DIR}/command_stream_receiver_hw_tests_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_tests_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_tests_xe_hpg_core.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/local_work_size_tests_dg2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cl_device_caps_xe_hpg_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmds_programming_xe_hpg_core.cpp ) diff --git a/opencl/test/unit_test/xe_hpg_core/dg2/CMakeLists.txt b/opencl/test/unit_test/xe_hpg_core/dg2/CMakeLists.txt index 9ba13c45af..c2bc7b0d61 100644 --- a/opencl/test/unit_test/xe_hpg_core/dg2/CMakeLists.txt +++ b/opencl/test/unit_test/xe_hpg_core/dg2/CMakeLists.txt @@ -13,7 +13,6 @@ if(TESTS_DG2) set(IGDRCL_SRCS_tests_xe_hpg_core_dg2 ${IGDRCL_SRCS_tests_xe_hpg_core_dg2_excludes} ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt - ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_tests_dg2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/buffer_pool_alloc_tests_dg2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/get_device_info_dg2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_cmds_programming_dg2.cpp diff --git a/opencl/test/unit_test/xe_hpg_core/dg2/command_queue_tests_dg2.cpp b/opencl/test/unit_test/xe_hpg_core/dg2/command_queue_tests_dg2.cpp deleted file mode 100644 index c8e4bbe3e2..0000000000 --- a/opencl/test/unit_test/xe_hpg_core/dg2/command_queue_tests_dg2.cpp +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (C) 2023 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/command_stream/scratch_space_controller_base.h" -#include "shared/source/os_interface/hw_info_config.h" -#include "shared/source/xe_hpg_core/hw_cmds_dg2.h" -#include "shared/test/common/helpers/engine_descriptor_helper.h" -#include "shared/test/common/mocks/mock_command_stream_receiver.h" -#include "shared/test/common/test_macros/header/per_product_test_definitions.h" -#include "shared/test/common/test_macros/test.h" - -#include "opencl/source/event/event_builder.h" -#include "opencl/source/helpers/task_information.h" -#include "opencl/test/unit_test/command_queue/command_queue_fixture.h" -#include "opencl/test/unit_test/mocks/mock_command_queue.h" -#include "opencl/test/unit_test/mocks/mock_kernel.h" -#include "opencl/test/unit_test/mocks/mock_mdi.h" - -using namespace NEO; - -class MyMockCommandStreamReceiver : public MockCommandStreamReceiver { - public: - using CommandStreamReceiver::scratchSpaceController; - MyMockCommandStreamReceiver(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) - : MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {} - CompletionStamp flushTask( - LinearStream &commandStream, - size_t commandStreamStart, - const IndirectHeap *dsh, - const IndirectHeap *ioh, - const IndirectHeap *ssh, - TaskCountType taskLevel, - DispatchFlags &dispatchFlags, - Device &device) override { - disableEuFusionPassed = dispatchFlags.disableEUFusion; - return MockCommandStreamReceiver::flushTask(commandStream, commandStreamStart, dsh, ioh, ssh, taskLevel, dispatchFlags, device); - } - bool disableEuFusionPassed = false; -}; -template -class MockCmdQueueOverrideCsr : public MockCommandQueueHw { - public: - MockCmdQueueOverrideCsr(Context *context, - ClDevice *device, - MyMockCommandStreamReceiver *csr) : MockCommandQueueHw(context, device, nullptr) { - this->csr = csr; - } - CommandStreamReceiver &getGpgpuCommandStreamReceiver() const override { return *csr; } - MyMockCommandStreamReceiver *csr = nullptr; -}; - -DG2TEST_F(CommandQueueHwTest, GivenKernelWithDpasAndOddWorkGroupWhenenqueueNonBlockedCalledThenDisableEuFusionPassedToFlushTask) { - auto hardwareInfo = *defaultHwInfo; - auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo, 0)); - std::unique_ptr osContext(OsContext::create(mockDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), mockDevice->getRootDeviceIndex(), 0, - EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS1, EngineUsage::Regular}, - PreemptionMode::ThreadGroup, mockDevice->getDeviceBitfield()))); - auto csr = std::make_unique(*mockDevice->getExecutionEnvironment(), mockDevice->getRootDeviceIndex(), mockDevice->getDeviceBitfield()); - csr->setupContext(*osContext); - auto scratchController = new ScratchSpaceControllerBase(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *csr->getInternalAllocationStorage()); - csr->scratchSpaceController.reset(scratchController); - MockCmdQueueOverrideCsr cmdQ(pContext, mockDevice.get(), csr.get()); - MockKernelWithInternals mockKernelWithInternals(*mockDevice.get()); - auto pKernel = mockKernelWithInternals.mockKernel; - MockMultiDispatchInfo multiDispatchInfo(mockDevice.get(), pKernel); - BlitPropertiesContainer blitPropertiesContainer; - const EnqueueProperties enqueueProperties(false, true, false, false, false, &blitPropertiesContainer); - TimestampPacketDependencies timestampPacketDependencies; - EventsRequest eventsRequest(0, nullptr, nullptr); - EventBuilder eventBuilder; - LinearStream commandStream; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[0] = 0; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[1] = 4; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[2] = 8; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[0] = 12; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[1] = 16; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[2] = 20; - - pKernel->setLocalWorkSizeValues(3, 7, 1); - pKernel->setNumWorkGroupsValues(5, 1, 1); - - bool blocking = false; - const_cast(pKernel->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = true; - cmdQ.template enqueueNonBlocked(nullptr, 0, commandStream, commandStream.getUsed(), blocking, true, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, nullptr); - EXPECT_TRUE(csr->disableEuFusionPassed); -} - -DG2TEST_F(CommandQueueHwTest, GivenKernelWithDpasAndNotOddWorkGroupWhenenqueueNonBlockedCalledThenDisableEuFusionNotPassedToFlushTask) { - auto hardwareInfo = *defaultHwInfo; - auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo, 0)); - std::unique_ptr osContext(OsContext::create(mockDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), mockDevice->getRootDeviceIndex(), 0, - EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS1, EngineUsage::Regular}, - PreemptionMode::ThreadGroup, mockDevice->getDeviceBitfield()))); - auto csr = std::make_unique(*mockDevice->getExecutionEnvironment(), mockDevice->getRootDeviceIndex(), mockDevice->getDeviceBitfield()); - csr->setupContext(*osContext); - auto scratchController = new ScratchSpaceControllerBase(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *csr->getInternalAllocationStorage()); - csr->scratchSpaceController.reset(scratchController); - MockCmdQueueOverrideCsr cmdQ(pContext, mockDevice.get(), csr.get()); - MockKernelWithInternals mockKernelWithInternals(*mockDevice.get()); - auto pKernel = mockKernelWithInternals.mockKernel; - MockMultiDispatchInfo multiDispatchInfo(mockDevice.get(), pKernel); - BlitPropertiesContainer blitPropertiesContainer; - const EnqueueProperties enqueueProperties(false, true, false, false, false, &blitPropertiesContainer); - TimestampPacketDependencies timestampPacketDependencies; - EventsRequest eventsRequest(0, nullptr, nullptr); - EventBuilder eventBuilder; - LinearStream commandStream; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[0] = 0; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[1] = 4; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.localWorkSize[2] = 8; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[0] = 12; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[1] = 16; - const_cast(pKernel->getDescriptor()).payloadMappings.dispatchTraits.numWorkGroups[2] = 20; - - pKernel->setLocalWorkSizeValues(4, 7, 1); - pKernel->setNumWorkGroupsValues(5, 1, 1); - - bool blocking = false; - const_cast(pKernel->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = true; - cmdQ.template enqueueNonBlocked(nullptr, 0, commandStream, commandStream.getUsed(), blocking, true, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, nullptr); - EXPECT_FALSE(csr->disableEuFusionPassed); -} -DG2TEST_F(CommandQueueHwTest, GivenKernelWithRequiredDisableEuFusionWhenenqueueNonBlockedCalledThenDisableEuFusionPassedToFlushTask) { - auto hardwareInfo = *defaultHwInfo; - auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo, 0)); - std::unique_ptr osContext(OsContext::create(mockDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), mockDevice->getRootDeviceIndex(), 0, - EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS1, EngineUsage::Regular}, - PreemptionMode::ThreadGroup, mockDevice->getDeviceBitfield()))); - auto csr = std::make_unique(*mockDevice->getExecutionEnvironment(), mockDevice->getRootDeviceIndex(), mockDevice->getDeviceBitfield()); - csr->setupContext(*osContext); - auto scratchController = new ScratchSpaceControllerBase(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *csr->getInternalAllocationStorage()); - csr->scratchSpaceController.reset(scratchController); - MockCmdQueueOverrideCsr cmdQ(pContext, mockDevice.get(), csr.get()); - MockKernelWithInternals mockKernelWithInternals(*mockDevice.get()); - auto pKernel = mockKernelWithInternals.mockKernel; - MockMultiDispatchInfo multiDispatchInfo(mockDevice.get(), pKernel); - BlitPropertiesContainer blitPropertiesContainer; - const EnqueueProperties enqueueProperties(false, true, false, false, false, &blitPropertiesContainer); - TimestampPacketDependencies timestampPacketDependencies; - EventsRequest eventsRequest(0, nullptr, nullptr); - EventBuilder eventBuilder; - LinearStream commandStream; - - bool blocking = false; - const_cast(pKernel->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = true; - cmdQ.template enqueueNonBlocked(nullptr, 0, commandStream, commandStream.getUsed(), blocking, true, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, nullptr); - EXPECT_TRUE(csr->disableEuFusionPassed); -} -DG2TEST_F(CommandQueueHwTest, GivenKernelWithoutRequiredDisableEuFusionWhenenqueueNonBlockedCalledThenDisableEuFusionNotPassedToFlushTask) { - auto hardwareInfo = *defaultHwInfo; - auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hardwareInfo, 0)); - std::unique_ptr osContext(OsContext::create(mockDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), mockDevice->getRootDeviceIndex(), 0, - EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS1, EngineUsage::Regular}, - PreemptionMode::ThreadGroup, mockDevice->getDeviceBitfield()))); - auto csr = std::make_unique(*mockDevice->getExecutionEnvironment(), mockDevice->getRootDeviceIndex(), mockDevice->getDeviceBitfield()); - csr->setupContext(*osContext); - auto scratchController = new ScratchSpaceControllerBase(pDevice->getRootDeviceIndex(), *pDevice->executionEnvironment, *csr->getInternalAllocationStorage()); - csr->scratchSpaceController.reset(scratchController); - MockCmdQueueOverrideCsr cmdQ(pContext, mockDevice.get(), csr.get()); - MockKernelWithInternals mockKernelWithInternals(*mockDevice.get()); - auto pKernel = mockKernelWithInternals.mockKernel; - MockMultiDispatchInfo multiDispatchInfo(mockDevice.get(), pKernel); - BlitPropertiesContainer blitPropertiesContainer; - const EnqueueProperties enqueueProperties(false, true, false, false, false, &blitPropertiesContainer); - TimestampPacketDependencies timestampPacketDependencies; - EventsRequest eventsRequest(0, nullptr, nullptr); - EventBuilder eventBuilder; - LinearStream commandStream; - - bool blocking = false; - const_cast(pKernel->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = false; - cmdQ.template enqueueNonBlocked(nullptr, 0, commandStream, commandStream.getUsed(), blocking, true, multiDispatchInfo, enqueueProperties, timestampPacketDependencies, eventsRequest, eventBuilder, 0, nullptr); - EXPECT_FALSE(csr->disableEuFusionPassed); -} \ No newline at end of file diff --git a/opencl/test/unit_test/xe_hpg_core/dg2/test_cmds_programming_dg2.cpp b/opencl/test/unit_test/xe_hpg_core/dg2/test_cmds_programming_dg2.cpp index b9bf21d0bb..631156fca5 100644 --- a/opencl/test/unit_test/xe_hpg_core/dg2/test_cmds_programming_dg2.cpp +++ b/opencl/test/unit_test/xe_hpg_core/dg2/test_cmds_programming_dg2.cpp @@ -18,7 +18,6 @@ #include "shared/test/common/test_macros/test.h" #include "opencl/source/mem_obj/buffer.h" -#include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_context.h" diff --git a/opencl/test/unit_test/xe_hpg_core/local_work_size_tests_dg2.cpp b/opencl/test/unit_test/xe_hpg_core/local_work_size_tests_dg2.cpp deleted file mode 100644 index dc90358a91..0000000000 --- a/opencl/test/unit_test/xe_hpg_core/local_work_size_tests_dg2.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (C) 2023 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/helpers/gfx_core_helper.h" -#include "shared/source/helpers/local_work_size.h" -#include "shared/test/common/helpers/debug_manager_state_restore.h" -#include "shared/test/common/mocks/mock_device.h" -#include "shared/test/common/mocks/mock_execution_environment.h" -#include "shared/test/common/test_macros/hw_test.h" - -#include "opencl/source/command_queue/cl_local_work_size.h" -#include "opencl/source/helpers/dispatch_info.h" -#include "opencl/test/unit_test/mocks/mock_cl_device.h" -#include "opencl/test/unit_test/mocks/mock_kernel.h" - -using namespace NEO; - -using LocalWorkSizeTestDG2 = ::testing::Test; - -DG2TEST_F(LocalWorkSizeTestDG2, givenKernelWithDpasAndSlmWhenWorkSizeInfoCalculatedThenMinWGSizeIsLessThanForKernelWithoutDpas) { - MockClDevice device{new MockDevice}; - MockKernelWithInternals kernel(device); - DispatchInfo dispatchInfo; - dispatchInfo.setClDevice(&device); - dispatchInfo.setKernel(kernel.mockKernel); - - auto threadsPerEu = defaultHwInfo->gtSystemInfo.ThreadCount / defaultHwInfo->gtSystemInfo.EUCount; - auto euPerSubSlice = defaultHwInfo->gtSystemInfo.ThreadCount / defaultHwInfo->gtSystemInfo.MaxEuPerSubSlice; - - auto &deviceInfo = device.sharedDeviceInfo; - deviceInfo.maxNumEUsPerSubSlice = euPerSubSlice; - deviceInfo.numThreadsPerEU = threadsPerEu; - kernel.mockKernel->slmTotalSize = 0x100; - - const_cast(kernel.mockKernel->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = true; - WorkSizeInfo workSizeInfoWithDpas = createWorkSizeInfoFromDispatchInfo(dispatchInfo); - - const_cast(kernel.mockKernel->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = false; - WorkSizeInfo workSizeInfoWithoutDpas = createWorkSizeInfoFromDispatchInfo(dispatchInfo); - EXPECT_NE(workSizeInfoWithDpas.minWorkGroupSize, workSizeInfoWithoutDpas.minWorkGroupSize); -} - -DG2TEST_F(LocalWorkSizeTestDG2, givenKernelWithFusedEuDisabledAndSlmWhenWorkSizeInfoCalculatedThenMinWGSizeIsLessThanForKernelWithoutDpas) { - MockClDevice device{new MockDevice}; - MockKernelWithInternals kernel(device); - DispatchInfo dispatchInfo; - dispatchInfo.setClDevice(&device); - dispatchInfo.setKernel(kernel.mockKernel); - - auto threadsPerEu = defaultHwInfo->gtSystemInfo.ThreadCount / defaultHwInfo->gtSystemInfo.EUCount; - auto euPerSubSlice = defaultHwInfo->gtSystemInfo.ThreadCount / defaultHwInfo->gtSystemInfo.MaxEuPerSubSlice; - - auto &deviceInfo = device.sharedDeviceInfo; - deviceInfo.maxNumEUsPerSubSlice = euPerSubSlice; - deviceInfo.numThreadsPerEU = threadsPerEu; - kernel.mockKernel->slmTotalSize = 0x100; - - const_cast(kernel.mockKernel->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = true; - WorkSizeInfo workSizeInfoWithDpas = createWorkSizeInfoFromDispatchInfo(dispatchInfo); - - const_cast(kernel.mockKernel->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = false; - WorkSizeInfo workSizeInfoWithoutDpas = createWorkSizeInfoFromDispatchInfo(dispatchInfo); - EXPECT_NE(workSizeInfoWithDpas.minWorkGroupSize, workSizeInfoWithoutDpas.minWorkGroupSize); -} \ No newline at end of file diff --git a/shared/source/helpers/gfx_core_helper_base.inl b/shared/source/helpers/gfx_core_helper_base.inl index cb4d7ac24f..6a6610f743 100644 --- a/shared/source/helpers/gfx_core_helper_base.inl +++ b/shared/source/helpers/gfx_core_helper_base.inl @@ -723,4 +723,5 @@ template bool GfxCoreHelperHw::isRelaxedOrderingSupported() const { return false; } + } // namespace NEO diff --git a/shared/source/os_interface/hw_info_config.h b/shared/source/os_interface/hw_info_config.h index e6c989d86c..040bb83069 100644 --- a/shared/source/os_interface/hw_info_config.h +++ b/shared/source/os_interface/hw_info_config.h @@ -195,7 +195,6 @@ class ProductHelper { virtual uint32_t getDefaultRevisionId() const = 0; virtual bool isMultiContextResourceDeferDeletionSupported() const = 0; - virtual bool isFusedEuDisabledForDpas(bool kernelHasDpasInstructions, const uint32_t *lws, const uint32_t *groupCount) const = 0; virtual ~ProductHelper() = default; @@ -352,7 +351,6 @@ class ProductHelperHw : public ProductHelper { uint32_t getDefaultRevisionId() const override; bool isMultiContextResourceDeferDeletionSupported() const override; - bool isFusedEuDisabledForDpas(bool kernelHasDpasInstructions, const uint32_t *lws, const uint32_t *groupCount) const override; ~ProductHelperHw() override = default; diff --git a/shared/source/os_interface/hw_info_config.inl b/shared/source/os_interface/hw_info_config.inl index 878c84676e..486ce61baa 100644 --- a/shared/source/os_interface/hw_info_config.inl +++ b/shared/source/os_interface/hw_info_config.inl @@ -727,8 +727,4 @@ bool ProductHelperHw::isMultiContextResourceDeferDeletionSupported() return false; } -template -bool ProductHelperHw::isFusedEuDisabledForDpas(bool kernelHasDpasInstructions, const uint32_t *lws, const uint32_t *groupCount) const { - return false; -} } // namespace NEO diff --git a/shared/source/xe_hpg_core/dg2/os_agnostic_hw_info_config_dg2.inl b/shared/source/xe_hpg_core/dg2/os_agnostic_hw_info_config_dg2.inl index f9edc4976f..133a25bfba 100644 --- a/shared/source/xe_hpg_core/dg2/os_agnostic_hw_info_config_dg2.inl +++ b/shared/source/xe_hpg_core/dg2/os_agnostic_hw_info_config_dg2.inl @@ -235,21 +235,5 @@ template <> std::optional ProductHelperHw::getAubStreamProductFamily() const { return aub_stream::ProductFamily::Dg2; }; -template <> -bool ProductHelperHw::isFusedEuDisabledForDpas(bool kernelHasDpasInstructions, const uint32_t *lws, const uint32_t *groupCount) const { - if (!kernelHasDpasInstructions) { - return false; - } else if (lws == nullptr || groupCount == nullptr) { - return true; - } else if (size_t lwsCount = lws[0] * lws[1] * lws[2]; lwsCount > 1 && (lwsCount & 1) != 0) { - return true; - } else if (lwsCount > 1) { - return false; - } else if ((groupCount[0] & 1) != 0) { - return true; - } else { - return false; - } -} } // namespace NEO diff --git a/shared/source/xe_hpg_core/windows/hw_info_config_dg2.cpp b/shared/source/xe_hpg_core/windows/hw_info_config_dg2.cpp index 3e689e0043..017c215241 100644 --- a/shared/source/xe_hpg_core/windows/hw_info_config_dg2.cpp +++ b/shared/source/xe_hpg_core/windows/hw_info_config_dg2.cpp @@ -43,5 +43,6 @@ template <> bool ProductHelperHw::isMultiContextResourceDeferDeletionSupported() const { return true; } + template class ProductHelperHw; } // namespace NEO diff --git a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp index 5f1775902b..f74885ba7f 100644 --- a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp +++ b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp @@ -1459,7 +1459,3 @@ HWTEST_F(ProductHelperCommonTest, givenPatIndexAndAllocationTypeWhenCallOverride patIndex = 3u; EXPECT_EQ(patIndex, gfxCoreHelper.overridePatIndex(allocationType, patIndex)); } -HWTEST_F(ProductHelperCommonTest, givenHwHelperWhenIsFusedEuDisabledForDpasCalledThenFalseReturned) { - auto &gfxCoreHelper = getHelper(); - EXPECT_FALSE(gfxCoreHelper.isFusedEuDisabledForDpas(true, nullptr, nullptr)); -} \ No newline at end of file diff --git a/shared/test/unit_test/xe_hpg_core/dg2/excludes_xe_hpg_core_dg2.cpp b/shared/test/unit_test/xe_hpg_core/dg2/excludes_xe_hpg_core_dg2.cpp index cbf82e94b2..0b25650a85 100644 --- a/shared/test/unit_test/xe_hpg_core/dg2/excludes_xe_hpg_core_dg2.cpp +++ b/shared/test/unit_test/xe_hpg_core/dg2/excludes_xe_hpg_core_dg2.cpp @@ -33,4 +33,3 @@ HWTEST_EXCLUDE_PRODUCT(XeHpgSbaTest, givenSpecificProductFamilyWhenAppendingSbaT HWTEST_EXCLUDE_PRODUCT(GfxCoreHelperTest, GivenZeroSlmSizeWhenComputeSlmSizeIsCalledThenCorrectValueIsReturned, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(ProductHelperTestXeHpgCore, givenProductHelperWhenCheckTimestampWaitSupportForEventsThenReturnFalse, IGFX_DG2); HWTEST_EXCLUDE_PRODUCT(CommandEncodeStatesTestDg2AndLater, GivenVariousSlmTotalSizesAndSettingRevIDToDifferentValuesWhenSetAdditionalInfoIsCalledThenCorrectValuesAreSet_IsXeHpgCore, IGFX_DG2); -HWTEST_EXCLUDE_PRODUCT(ProductHelperCommonTest, givenHwHelperWhenIsFusedEuDisabledForDpasCalledThenFalseReturned, IGFX_DG2); diff --git a/shared/test/unit_test/xe_hpg_core/dg2/product_config_helper_tests_dg2.cpp b/shared/test/unit_test/xe_hpg_core/dg2/product_config_helper_tests_dg2.cpp index 77db8a812f..2ff34b3482 100644 --- a/shared/test/unit_test/xe_hpg_core/dg2/product_config_helper_tests_dg2.cpp +++ b/shared/test/unit_test/xe_hpg_core/dg2/product_config_helper_tests_dg2.cpp @@ -6,9 +6,7 @@ */ #include "shared/source/helpers/product_config_helper.h" -#include "shared/source/os_interface/hw_info_config.h" #include "shared/source/xe_hpg_core/hw_cmds_dg2.h" -#include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/test_macros/header/per_product_test_definitions.h" #include "shared/test/common/test_macros/test.h" @@ -16,7 +14,6 @@ using namespace NEO; using ProductConfigHelperDg2Tests = ::testing::Test; -using ProductHelperTests = Test; DG2TEST_F(ProductConfigHelperDg2Tests, givenVariousVariantsOfXeHpgAcronymsWhenGetReleaseThenCorrectValueIsReturned) { std::vector acronymsVariants = {"xe_hpg_core", "xe_hpg", "xehpg", "XeHpg"}; @@ -32,55 +29,3 @@ DG2TEST_F(ProductConfigHelperDg2Tests, givenXeHpgReleaseWhenSearchForDeviceAcron auto aotInfos = productConfigHelper->getDeviceAotInfo(); EXPECT_TRUE(std::any_of(aotInfos.begin(), aotInfos.end(), ProductConfigHelper::findDeviceAcronymForRelease(AOT::XE_HPG_RELEASE))); } -DG2TEST_F(ProductHelperTests, givenNoDpasInstructionInKernelHelperWhenCheckingIfEuFusionShouldBeDisabledThenFalseReturned) { - auto &gfxCoreHelper = getHelper(); - const uint32_t lws[3] = {1, 1, 1}; - const uint32_t groupCount[3] = {5, 3, 1}; - bool dpasInstruction = false; - EXPECT_FALSE(gfxCoreHelper.isFusedEuDisabledForDpas(dpasInstruction, lws, groupCount)); -} -DG2TEST_F(ProductHelperTests, givenDpasInstructionLwsAndGroupCountIsNullPtrInKernelHelperWhenCheckingIfEuFusionShouldBeDisabledThenTrueReturned) { - auto &gfxCoreHelper = getHelper(); - bool dpasInstruction = true; - EXPECT_TRUE(gfxCoreHelper.isFusedEuDisabledForDpas(dpasInstruction, nullptr, nullptr)); -} -DG2TEST_F(ProductHelperTests, givenDpasInstructionLwsIsNullPtrInKernelHelperWhenCheckingIfEuFusionShouldBeDisabledThenTrueReturned) { - auto &gfxCoreHelper = getHelper(); - bool dpasInstruction = true; - const uint32_t groupCount[3] = {5, 3, 1}; - EXPECT_TRUE(gfxCoreHelper.isFusedEuDisabledForDpas(dpasInstruction, nullptr, groupCount)); -} -DG2TEST_F(ProductHelperTests, givenDpasInstructionGroupCountIsNullPtrInKernelHelperWhenCheckingIfEuFusionShouldBeDisabledThenTrueReturned) { - auto &gfxCoreHelper = getHelper(); - bool dpasInstruction = true; - const uint32_t lws[3] = {1, 1, 1}; - EXPECT_TRUE(gfxCoreHelper.isFusedEuDisabledForDpas(dpasInstruction, lws, nullptr)); -} -DG2TEST_F(ProductHelperTests, givenDpasInstructionLwsAndLwsIsOddWhenCheckingIfEuFusionShouldBeDisabledThenTrueReturned) { - auto &gfxCoreHelper = getHelper(); - const uint32_t lws[3] = {7, 3, 1}; - const uint32_t groupCount[3] = {2, 1, 1}; - bool dpasInstruction = true; - EXPECT_TRUE(gfxCoreHelper.isFusedEuDisabledForDpas(dpasInstruction, lws, groupCount)); -} -DG2TEST_F(ProductHelperTests, givenDpasInstructionLwsAndLwsIsNoOddWhenCheckingIfEuFusionShouldBeDisabledThenFalseReturned) { - auto &gfxCoreHelper = getHelper(); - const uint32_t lws[3] = {8, 3, 1}; - const uint32_t groupCount[3] = {2, 1, 1}; - bool dpasInstruction = true; - EXPECT_FALSE(gfxCoreHelper.isFusedEuDisabledForDpas(dpasInstruction, lws, groupCount)); -} -DG2TEST_F(ProductHelperTests, givenDpasInstructionLwsAndLwsIsOneAndXGroupCountIsOddWhenCheckingIfEuFusionShouldBeDisabledThenFalseReturned) { - auto &gfxCoreHelper = getHelper(); - const uint32_t lws[3] = {1, 1, 1}; - const uint32_t groupCount[3] = {5, 1, 1}; - bool dpasInstruction = true; - EXPECT_TRUE(gfxCoreHelper.isFusedEuDisabledForDpas(dpasInstruction, lws, groupCount)); -} -DG2TEST_F(ProductHelperTests, givenDpasInstructionLwsAndLwsIsOneAndXGroupCountIsNoOddWhenCheckingIfEuFusionShouldBeDisabledThenFalseReturned) { - auto &gfxCoreHelper = getHelper(); - const uint32_t lws[3] = {1, 1, 1}; - const uint32_t groupCount[3] = {4, 1, 1}; - bool dpasInstruction = true; - EXPECT_FALSE(gfxCoreHelper.isFusedEuDisabledForDpas(dpasInstruction, lws, groupCount)); -}