mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
feature: adjust maxWorkGroupSize value
Related-To: NEO-7357 Signed-off-by: Rafal Maziejuk <rafal.maziejuk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
6437c1a91e
commit
b9828b543e
@@ -747,7 +747,9 @@ ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties)
|
||||
memset(pKernelProperties->uuid.kid, 0, ZE_MAX_KERNEL_UUID_SIZE);
|
||||
memset(pKernelProperties->uuid.mid, 0, ZE_MAX_MODULE_UUID_SIZE);
|
||||
|
||||
const auto &gfxCoreHelper = this->module->getDevice()->getGfxCoreHelper();
|
||||
uint32_t maxKernelWorkGroupSize = static_cast<uint32_t>(this->module->getMaxGroupSize(kernelDescriptor));
|
||||
maxKernelWorkGroupSize = gfxCoreHelper.adjustMaxWorkGroupSize(kernelDescriptor.kernelAttributes.numGrfRequired, kernelDescriptor.kernelAttributes.simdSize, maxKernelWorkGroupSize);
|
||||
pKernelProperties->maxNumSubgroups = maxKernelWorkGroupSize / kernelDescriptor.kernelAttributes.simdSize;
|
||||
|
||||
void *pNext = pKernelProperties->pNext;
|
||||
@@ -758,7 +760,6 @@ ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties)
|
||||
reinterpret_cast<ze_kernel_preferred_group_size_properties_t *>(extendedProperties);
|
||||
|
||||
preferredGroupSizeProperties->preferredMultiple = this->kernelImmData->getKernelInfo()->getMaxSimdSize();
|
||||
auto &gfxCoreHelper = this->module->getDevice()->getGfxCoreHelper();
|
||||
if (gfxCoreHelper.isFusedEuDispatchEnabled(this->module->getDevice()->getHwInfo(), kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion)) {
|
||||
preferredGroupSizeProperties->preferredMultiple *= 2;
|
||||
}
|
||||
|
||||
@@ -319,15 +319,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenSignalEventWhenAppendLaunchIndirec
|
||||
context->freeMem(alloc);
|
||||
}
|
||||
|
||||
struct ProgramChangedFieldsInComputeMode {
|
||||
template <PRODUCT_FAMILY productFamily>
|
||||
static constexpr bool isMatched() {
|
||||
if (productFamily == IGFX_BROADWELL)
|
||||
return false;
|
||||
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::programOnlyChangedFieldsInComputeStateMode;
|
||||
}
|
||||
};
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenUpdateStreamPropertiesIsCalledTwiceThenChangedFieldsAreDirty, ProgramChangedFieldsInComputeMode) {
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenUpdateStreamPropertiesIsCalledTwiceThenChangedFieldsAreDirty, IsAtLeastGen12lp) {
|
||||
DebugManagerStateRestore restorer;
|
||||
auto &productHelper = device->getProductHelper();
|
||||
|
||||
@@ -360,7 +352,9 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenUpdateStr
|
||||
|
||||
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80;
|
||||
commandList->updateStreamProperties(kernel, false, &launchKernelArgs, false);
|
||||
EXPECT_EQ(productHelper.isGrfNumReportedWithScm(), commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
|
||||
if constexpr (TestTraits<gfxCoreFamily>::largeGrfModeInStateComputeModeSupported) {
|
||||
EXPECT_EQ(productHelper.isGrfNumReportedWithScm(), commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
|
||||
}
|
||||
if (productHelper.getScmPropertyCoherencyRequiredSupport()) {
|
||||
EXPECT_EQ(0, commandList->finalStreamState.stateComputeMode.isCoherencyRequired.value);
|
||||
} else {
|
||||
|
||||
@@ -2252,11 +2252,14 @@ bool Kernel::areMultipleSubDevicesInContext() const {
|
||||
}
|
||||
|
||||
void Kernel::reconfigureKernel() {
|
||||
auto &kernelDescriptor = kernelInfo.kernelDescriptor;
|
||||
const auto &kernelDescriptor = kernelInfo.kernelDescriptor;
|
||||
if (kernelDescriptor.kernelAttributes.numGrfRequired == GrfConfig::LargeGrfNumber &&
|
||||
kernelDescriptor.kernelAttributes.simdSize != 32) {
|
||||
maxKernelWorkGroupSize >>= 1;
|
||||
this->maxKernelWorkGroupSize >>= 1;
|
||||
}
|
||||
const auto &gfxCoreHelper = getDevice().getGfxCoreHelper();
|
||||
this->maxKernelWorkGroupSize = gfxCoreHelper.adjustMaxWorkGroupSize(kernelDescriptor.kernelAttributes.numGrfRequired, kernelDescriptor.kernelAttributes.simdSize, this->maxKernelWorkGroupSize);
|
||||
|
||||
this->containsStatelessWrites = kernelDescriptor.kernelAttributes.flags.usesStatelessWrites;
|
||||
this->systolicPipelineSelectMode = kernelDescriptor.kernelAttributes.flags.usesSystolicPipelineSelectMode;
|
||||
}
|
||||
|
||||
@@ -3303,7 +3303,7 @@ HWTEST_F(KernelLargeGrfTests, GivenLargeGrfAndSimdSizeWhenGettingMaxWorkGroupSiz
|
||||
{
|
||||
MockKernel kernel(program.get(), *pKernelInfo, *pClDevice);
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber - 1;
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
EXPECT_EQ(CL_SUCCESS, kernel.initialize());
|
||||
EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, *kernel.maxWorkGroupSizeForCrossThreadData);
|
||||
EXPECT_EQ(pDevice->getDeviceInfo().maxWorkGroupSize, kernel.maxKernelWorkGroupSize);
|
||||
|
||||
@@ -143,6 +143,9 @@ void Device::initializeCaps() {
|
||||
maxWS = Math::prevPowerOfTwo(maxWS);
|
||||
deviceInfo.maxWorkGroupSize = std::min(maxWS, 1024u);
|
||||
|
||||
const auto minGrfSize = gfxCoreHelper.getMinimalGrfSize();
|
||||
deviceInfo.maxWorkGroupSize = gfxCoreHelper.adjustMaxWorkGroupSize(minGrfSize, simdSizeUsed, static_cast<uint32_t>(deviceInfo.maxWorkGroupSize));
|
||||
|
||||
if (DebugManager.flags.OverrideMaxWorkgroupSize.get() != -1) {
|
||||
deviceInfo.maxWorkGroupSize = DebugManager.flags.OverrideMaxWorkgroupSize.get();
|
||||
}
|
||||
|
||||
@@ -103,6 +103,7 @@ class GfxCoreHelper {
|
||||
virtual bool isWaDisableRccRhwoOptimizationRequired() const = 0;
|
||||
virtual bool isAdditionalFeatureFlagRequired(const FeatureTable *featureTable) const = 0;
|
||||
virtual uint32_t getMinimalSIMDSize() const = 0;
|
||||
virtual uint32_t getMinimalGrfSize() const = 0;
|
||||
virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo, const ProductHelper &productHelper) const = 0;
|
||||
virtual bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const = 0;
|
||||
virtual uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const = 0;
|
||||
@@ -118,6 +119,7 @@ class GfxCoreHelper {
|
||||
virtual bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
|
||||
virtual uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
|
||||
const RootDeviceEnvironment &rootDeviceEnvironment, bool isEngineInstanced) const = 0;
|
||||
virtual uint32_t adjustMaxWorkGroupSize(const uint32_t numGrf, const uint32_t simd, const uint32_t defaultMaxGroupSize) const = 0;
|
||||
virtual size_t getMaxFillPaternSizeForCopyEngine() const = 0;
|
||||
virtual size_t getSipKernelMaxDbgSurfaceSize(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isSipWANeeded(const HardwareInfo &hwInfo) const = 0;
|
||||
@@ -293,6 +295,8 @@ class GfxCoreHelperHw : public GfxCoreHelper {
|
||||
|
||||
uint32_t getMinimalSIMDSize() const override;
|
||||
|
||||
uint32_t getMinimalGrfSize() const override;
|
||||
|
||||
uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const override;
|
||||
|
||||
uint32_t getGlobalTimeStampBits() const override;
|
||||
@@ -316,6 +320,8 @@ class GfxCoreHelperHw : public GfxCoreHelper {
|
||||
uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
|
||||
const RootDeviceEnvironment &rootDeviceEnvironment, bool isEngineInstanced) const override;
|
||||
|
||||
uint32_t adjustMaxWorkGroupSize(const uint32_t numGrf, const uint32_t simd, const uint32_t defaultMaxGroupSize) const override;
|
||||
|
||||
size_t getMaxFillPaternSizeForCopyEngine() const override;
|
||||
|
||||
size_t getSipKernelMaxDbgSurfaceSize(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
@@ -675,4 +675,14 @@ template <typename gfxProduct>
|
||||
bool GfxCoreHelperHw<gfxProduct>::isRelaxedOrderingSupported() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t GfxCoreHelperHw<GfxFamily>::adjustMaxWorkGroupSize(const uint32_t numGrf, const uint32_t simd, const uint32_t defaultMaxGroupSize) const {
|
||||
return defaultMaxGroupSize;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t GfxCoreHelperHw<GfxFamily>::getMinimalGrfSize() const {
|
||||
return 128u;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -19,4 +19,5 @@ struct TestTraits<IGFX_GEN12LP_CORE> {
|
||||
static constexpr bool implementsPreambleThreadArbitration = false;
|
||||
static constexpr bool forceGpuNonCoherent = true;
|
||||
static constexpr bool imagesSupported = true;
|
||||
static constexpr bool largeGrfModeInStateComputeModeSupported = true;
|
||||
};
|
||||
|
||||
@@ -29,4 +29,5 @@ struct TestTraits<IGFX_XE_HPC_CORE> {
|
||||
static constexpr bool isUnTypedDataPortCacheFlushSupported = true;
|
||||
static constexpr bool imagesSupported = false;
|
||||
static constexpr bool isPipeControlExtendedPriorToNonPipelinedStateCommandSupported = true;
|
||||
static constexpr bool largeGrfModeInStateComputeModeSupported = true;
|
||||
};
|
||||
|
||||
@@ -29,4 +29,5 @@ struct TestTraits<IGFX_XE_HPG_CORE> {
|
||||
static constexpr bool isUnTypedDataPortCacheFlushSupported = true;
|
||||
static constexpr bool imagesSupported = true;
|
||||
static constexpr bool isPipeControlExtendedPriorToNonPipelinedStateCommandSupported = false;
|
||||
static constexpr bool largeGrfModeInStateComputeModeSupported = true;
|
||||
};
|
||||
|
||||
@@ -15,8 +15,11 @@
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/default_hw_info.h"
|
||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "test_traits_common.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
struct MockStateComputeModeProperties : public StateComputeModeProperties {
|
||||
@@ -40,6 +43,8 @@ struct MockStateBaseAddressProperties : public StateBaseAddressProperties {
|
||||
using StateBaseAddressProperties::stateBaseAddressPropertiesSupport;
|
||||
};
|
||||
|
||||
using StreamPropertiesTests = ::testing::Test;
|
||||
|
||||
TEST(StreamPropertiesTests, whenPropertyValueIsChangedThenProperStateIsSet) {
|
||||
NEO::StreamProperty streamProperty;
|
||||
|
||||
@@ -108,7 +113,7 @@ TEST(StreamPropertiesTests, whenSettingCooperativeKernelPropertiesThenCorrectVal
|
||||
}
|
||||
}
|
||||
|
||||
TEST(StreamPropertiesTests, whenSettingStateComputeModePropertiesThenCorrectValuesAreSet) {
|
||||
HWTEST2_F(StreamPropertiesTests, whenSettingStateComputeModePropertiesThenCorrectValuesAreSet, IsAtLeastGen12lp) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.ForceGrfNumProgrammingWithScm.set(1);
|
||||
DebugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1);
|
||||
|
||||
@@ -26,6 +26,8 @@
|
||||
#include "shared/test/unit_test/fixtures/command_container_fixture.h"
|
||||
#include "shared/test/unit_test/mocks/mock_dispatch_kernel_encoder_interface.h"
|
||||
|
||||
#include "test_traits_common.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
using namespace NEO;
|
||||
@@ -520,7 +522,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDispatchInterfaceWhenNumRequiredGrfIsNotDefaultThenStateComputeModeCommandAdded) {
|
||||
HWTEST2_F(CommandEncodeStatesTest, givenDispatchInterfaceWhenNumRequiredGrfIsNotDefaultThenStateComputeModeCommandAdded, IsAtLeastGen12lp) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.ForceGrfNumProgrammingWithScm.set(1);
|
||||
|
||||
|
||||
@@ -927,10 +927,15 @@ HWCMDTEST_F(IGFX_GEN8_CORE, GfxCoreHelperTest, givenDefaultGfxCoreHelperHwWhenIs
|
||||
}
|
||||
|
||||
HWTEST_F(GfxCoreHelperTest, givenDefaultGfxCoreHelperHwWhenMinimalSIMDSizeIsQueriedThen8IsReturned) {
|
||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
EXPECT_EQ(8u, gfxCoreHelper.getMinimalSIMDSize());
|
||||
}
|
||||
|
||||
HWTEST_F(GfxCoreHelperTest, givenDefaultGfxCoreHelperHwWhenMinimalGrfSizeIsQueriedThen128IsReturned) {
|
||||
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
EXPECT_EQ(128u, gfxCoreHelper.getMinimalGrfSize());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, GfxCoreHelperTest, WhenIsFusedEuDispatchEnabledIsCalledThenFalseIsReturned) {
|
||||
if (hardwareInfo.platform.eRenderCoreFamily == IGFX_GEN12LP_CORE) {
|
||||
GTEST_SKIP();
|
||||
@@ -1563,3 +1568,20 @@ HWTEST_F(GfxCoreHelperTest, GivenCooperativeEngineSupportedAndNotUsedWhenAdjustM
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(GfxCoreHelperTest, givenNumGrfAndSimdSizeWhenAdjustingMaxWorkGroupSizeThenAlwaysReturnDeviceDefault) {
|
||||
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
constexpr auto defaultMaxGroupSize = 1024u;
|
||||
|
||||
uint32_t simdSize = 16u;
|
||||
uint32_t numGrfRequired = GrfConfig::LargeGrfNumber;
|
||||
EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, defaultMaxGroupSize));
|
||||
|
||||
simdSize = 32u;
|
||||
numGrfRequired = GrfConfig::LargeGrfNumber;
|
||||
EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, defaultMaxGroupSize));
|
||||
|
||||
simdSize = 16u;
|
||||
numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
EXPECT_EQ(defaultMaxGroupSize, gfxCoreHelper.adjustMaxWorkGroupSize(numGrfRequired, simdSize, defaultMaxGroupSize));
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "test_traits_common.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
@@ -506,7 +507,7 @@ HWTEST_F(ProductHelperTest, givenSamplerStateWhenAdjustSamplerStateThenNothingIs
|
||||
EXPECT_EQ(0, memcmp(&initialState, &state, sizeof(SAMPLER_STATE)));
|
||||
}
|
||||
|
||||
HWTEST_F(ProductHelperTest, WhenFillingScmPropertiesSupportThenExpectUseCorrectGetters) {
|
||||
HWTEST2_F(ProductHelperTest, WhenFillingScmPropertiesSupportThenExpectUseCorrectGetters, IsAtLeastGen12lp) {
|
||||
StateComputeModePropertiesSupport scmPropertiesSupport = {};
|
||||
|
||||
productHelper->fillScmPropertiesSupportStructure(scmPropertiesSupport);
|
||||
@@ -515,8 +516,10 @@ HWTEST_F(ProductHelperTest, WhenFillingScmPropertiesSupportThenExpectUseCorrectG
|
||||
EXPECT_EQ(productHelper->getScmPropertyCoherencyRequiredSupport(), scmPropertiesSupport.coherencyRequired);
|
||||
EXPECT_EQ(productHelper->getScmPropertyZPassAsyncComputeThreadLimitSupport(), scmPropertiesSupport.zPassAsyncComputeThreadLimit);
|
||||
EXPECT_EQ(productHelper->getScmPropertyPixelAsyncComputeThreadLimitSupport(), scmPropertiesSupport.pixelAsyncComputeThreadLimit);
|
||||
EXPECT_EQ(productHelper->isGrfNumReportedWithScm(), scmPropertiesSupport.largeGrfMode);
|
||||
EXPECT_EQ(productHelper->getScmPropertyDevicePreemptionModeSupport(), scmPropertiesSupport.devicePreemptionMode);
|
||||
if constexpr (TestTraits<gfxCoreFamily>::largeGrfModeInStateComputeModeSupported) {
|
||||
EXPECT_EQ(productHelper->isGrfNumReportedWithScm(), scmPropertiesSupport.largeGrfMode);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(ProductHelperTest, WhenFillingFrontEndPropertiesSupportThenExpectUseCorrectGetters) {
|
||||
|
||||
Reference in New Issue
Block a user