Disable overdispatch by default

When disable overdispatch is available:
- change default value of CFE_STATE::ComputeOverdispatchDisable to true.
- change default value of
INTERFACE_DESCRIPTOR_DATA::ThreadGroupDispatchSize to 3u.

Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2021-07-29 18:21:14 +00:00
committed by Compute-Runtime-Automation
parent d3fd5077e7
commit 2dd0e67e65
14 changed files with 116 additions and 56 deletions

View File

@ -2007,14 +2007,17 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE;
auto &hwInfo = device->getHwInfo();
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto disableOverdispatch = hwHelper.isDisableOverdispatchAvailable(hwInfo);
if (!containsAnyKernel) {
requiredStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), false, device->getHwInfo());
requiredStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), disableOverdispatch, device->getHwInfo());
finalStreamState = requiredStreamState;
containsAnyKernel = true;
}
auto &hwInfo = device->getHwInfo();
finalStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), false, hwInfo);
finalStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), disableOverdispatch, hwInfo);
if (finalStreamState.frontEndState.isDirty()) {
auto pVfeStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType);
auto pVfeState = new VFE_STATE_TYPE;
@ -2025,7 +2028,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
auto &neoDevice = *device->getNEODevice();
auto threadArbitrationPolicy = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily).getDefaultThreadArbitrationPolicy();
auto threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, threadArbitrationPolicy);
NEO::EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), true);

View File

@ -1292,5 +1292,31 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
}
}
using Platforms = IsAtLeastProduct<IGFX_SKYLAKE>;
HWTEST2_F(CommandListAppendLaunchKernel, whenUpdateStreamPropertiesIsCalledThenRequiredStateAndFinalStateAreCorrectlySet, Platforms) {
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(-1, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value);
EXPECT_EQ(-1, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value);
auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
int32_t expectedDisableOverdispatch = hwHelper.isDisableOverdispatchAvailable(*defaultHwInfo);
pCommandList->updateStreamProperties(kernel, false);
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value);
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value);
pCommandList->updateStreamProperties(kernel, false);
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value);
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value);
}
} // namespace ult
} // namespace L0

View File

@ -559,7 +559,7 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
UnifiedMemoryControls unifiedMemoryControls{};
bool isUnifiedMemorySyncRequired = true;
bool debugEnabled = false;
uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch;
uint32_t *maxWorkGroupSizeForCrossThreadData = &Kernel::dummyPatchLocation;
uint32_t maxKernelWorkGroupSize = 0;

View File

@ -5,8 +5,13 @@
*
*/
#include "shared/source/command_stream/scratch_space_controller.h"
#include "shared/source/command_stream/scratch_space_controller_base.h"
#include "shared/source/helpers/constants.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/unit_test/utilities/base_object_utils.h"
#include "opencl/source/event/user_event.h"
@ -563,6 +568,46 @@ HWTEST_F(CommandStreamReceiverHwTest, WhenForceEnableGpuIdleImplicitFlushThenExp
EXPECT_TRUE(commandStreamReceiver->useGpuIdleImplicitFlush);
}
HWTEST2_F(CommandStreamReceiverHwTest, whenProgramVFEStateIsCalledThenCorrectComputeOverdispatchDisableValueIsProgrammed, IsAtLeastXeHpCore) {
using CFE_STATE = typename FamilyType::CFE_STATE;
UltDeviceFactory deviceFactory{1, 0};
auto pDevice = deviceFactory.rootDevices[0];
auto pHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
auto &hwHelper = HwHelper::get(pHwInfo->platform.eRenderCoreFamily);
uint8_t memory[1 * KB];
auto mockCsr = std::make_unique<MockCsrHw2<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(),
pDevice->getDeviceBitfield());
MockOsContext osContext{0, 8, EngineTypeUsage{aub_stream::ENGINE_CCS, EngineUsage::Regular}, PreemptionMode::Disabled, false};
mockCsr->setupContext(osContext);
uint32_t revisions[] = {REVISION_A0, REVISION_B};
for (auto revision : revisions) {
pHwInfo->platform.usRevId = hwHelper.getHwRevIdFromStepping(revision, *pHwInfo);
{
auto flags = DispatchFlagsHelper::createDefaultDispatchFlags();
LinearStream commandStream{&memory, sizeof(memory)};
mockCsr->mediaVfeStateDirty = true;
mockCsr->programVFEState(commandStream, flags, 10);
auto pCommand = reinterpret_cast<CFE_STATE *>(&memory);
auto expectedDisableOverdispatch = hwHelper.isDisableOverdispatchAvailable(*pHwInfo);
EXPECT_EQ(expectedDisableOverdispatch, pCommand->getComputeOverdispatchDisable());
}
{
auto flags = DispatchFlagsHelper::createDefaultDispatchFlags();
flags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
LinearStream commandStream{&memory, sizeof(memory)};
mockCsr->mediaVfeStateDirty = true;
mockCsr->programVFEState(commandStream, flags, 10);
auto pCommand = reinterpret_cast<CFE_STATE *>(&memory);
EXPECT_FALSE(pCommand->getComputeOverdispatchDisable());
}
}
}
HWTEST_F(BcsTests, WhenGetNumberOfBlitsForCopyPerRowIsCalledThenCorrectValuesAreReturned) {
auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment();
auto maxWidthToCopy = static_cast<size_t>(BlitCommandsHelper<FamilyType>::getMaxBlitWidth(rootDeviceEnvironment));

View File

@ -1292,10 +1292,9 @@ HWTEST_F(HwHelperTest, givenHwHelperWhenIsBlitterForImagesSupportedIsCalledThenF
EXPECT_FALSE(helper.isBlitterForImagesSupported(*defaultHwInfo));
}
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenHwHelperWhenAdditionalKernelExecInfoSupportCheckedThenReturnFalse) {
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenHwHelperWhenAdditionalKernelExecInfoSupportCheckedThenCorrectValueIsReturned) {
auto &helper = HwHelper::get(renderCoreFamily);
EXPECT_FALSE(helper.additionalKernelExecInfoSupported(*defaultHwInfo));
EXPECT_FALSE(helper.isDisableOverdispatchAvailable(*defaultHwInfo));
}
TEST_F(HwHelperTest, WhenGettingIsCpuImageTransferPreferredThenFalseIsReturned) {

View File

@ -298,3 +298,12 @@ XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenXeHpCoreWhenIsBlitterForImagesSup
auto &helper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
EXPECT_TRUE(helper.isBlitterForImagesSupported(hwInfo));
}
XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenHwHelperWhenAdditionalKernelExecInfoSupportCheckedThenCorrectValueIsReturned) {
auto &hwHelper = HwHelper::get(renderCoreFamily);
auto hwInfo = *defaultHwInfo;
EXPECT_FALSE(hwHelper.isDisableOverdispatchAvailable(hwInfo));
hwInfo.platform.usRevId = hwHelper.getHwRevIdFromStepping(REVISION_B, hwInfo);
EXPECT_TRUE(hwHelper.isDisableOverdispatchAvailable(hwInfo));
}

View File

@ -42,35 +42,3 @@ XE_HP_CORE_TEST_F(CmdsProgrammingTestsXeHpCore, givenL1CachingOverrideWhenStateB
memoryManager->freeGraphicsMemory(allocation);
}
XE_HP_CORE_TEST_F(CmdsProgrammingTestsXeHpCore, givenInterfaceDescriptorDataWhenBSteppingIsDetectedThenTGBatchSizeIsEqualTo3) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
INTERFACE_DESCRIPTOR_DATA iddArg;
iddArg = FamilyType::cmdInitInterfaceDescriptorData;
pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.usRevId = REVISION_B;
EncodeDispatchKernel<FamilyType>::adjustInterfaceDescriptorData(iddArg, pDevice->getHardwareInfo());
EXPECT_EQ(3u, iddArg.getThreadGroupDispatchSize());
}
using PreambleCfeState = PreambleFixture;
XE_HP_CORE_TEST_F(PreambleCfeState, givenXehpBSteppingWhenCfeIsProgrammedThenOverdispatchIsDisabled) {
using CFE_STATE = typename FamilyType::CFE_STATE;
auto backup = defaultHwInfo->platform.usRevId;
defaultHwInfo->platform.usRevId = REVISION_B;
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute);
StreamProperties streamProperties{};
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, AdditionalKernelExecInfo::NotApplicable, streamProperties);
parseCommands<FamilyType>(linearStream);
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), cfeStateIt);
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
EXPECT_TRUE(cfeState->getComputeOverdispatchDisable());
defaultHwInfo->platform.usRevId = backup;
}

View File

@ -933,10 +933,11 @@ inline void CommandStreamReceiverHw<GfxFamily>::programVFEState(LinearStream &cs
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto engineGroupType = hwHelper.getEngineGroupType(getOsContext().getEngineType(), hwInfo);
auto pVfeState = PreambleHelper<GfxFamily>::getSpaceForVfeState(&csr, hwInfo, engineGroupType);
auto disableOverdispatch = hwHelper.isDisableOverdispatchAvailable(hwInfo) &&
(dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotSet);
StreamProperties streamProperties{};
streamProperties.frontEndState.setProperties(lastKernelExecutionType == KernelExecutionType::Concurrent,
dispatchFlags.additionalKernelExecInfo == AdditionalKernelExecInfo::DisableOverdispatch,
hwInfo);
disableOverdispatch, hwInfo);
PreambleHelper<GfxFamily>::programVfeState(
pVfeState, hwInfo, requiredScratchSize, getScratchPatchAddress(),
maxFrontEndThreads, lastAdditionalKernelExecInfo, streamProperties);

View File

@ -134,7 +134,7 @@ class HwHelper {
virtual bool isCopyOnlyEngineType(EngineGroupType type) const = 0;
virtual void adjustAddressWidthForCanonize(uint32_t &addressWidth) const = 0;
virtual bool isSipWANeeded(const HardwareInfo &hwInfo) const = 0;
virtual bool additionalKernelExecInfoSupported(const HardwareInfo &hwInfo) const = 0;
virtual bool isDisableOverdispatchAvailable(const HardwareInfo &hwInfo) const = 0;
virtual bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const = 0;
virtual bool isKmdMigrationSupported(const HardwareInfo &hwInfo) const = 0;
virtual bool isNewResidencyModelSupported() const = 0;
@ -359,7 +359,7 @@ class HwHelperHw : public HwHelper {
bool isSipWANeeded(const HardwareInfo &hwInfo) const override;
bool additionalKernelExecInfoSupported(const HardwareInfo &hwInfo) const override;
bool isDisableOverdispatchAvailable(const HardwareInfo &hwInfo) const override;
bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const override;

View File

@ -462,7 +462,7 @@ inline bool HwHelperHw<GfxFamily>::isBlitCopyRequiredForLocalMemory(const Hardwa
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::additionalKernelExecInfoSupported(const HardwareInfo &hwInfo) const {
bool HwHelperHw<GfxFamily>::isDisableOverdispatchAvailable(const HardwareInfo &hwInfo) const {
return false;
}

View File

@ -44,9 +44,8 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
template <>
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) {
auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
if (helper.getSteppingFromHwRevId(hwInfo) >= REVISION_B) {
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
if (hwHelper.isDisableOverdispatchAvailable(hwInfo)) {
interfaceDescriptor.setThreadGroupDispatchSize(3u);
}

View File

@ -174,6 +174,11 @@ bool HwHelperHw<Family>::isBlitterForImagesSupported(const HardwareInfo &hwInfo)
return true;
}
template <>
bool HwHelperHw<Family>::isDisableOverdispatchAvailable(const HardwareInfo &hwInfo) const {
return (this->getSteppingFromHwRevId(hwInfo) >= REVISION_B);
}
template <>
void MemorySynchronizationCommands<Family>::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) {
using PIPE_CONTROL = typename Family::PIPE_CONTROL;

View File

@ -22,11 +22,6 @@ void PreambleHelper<Family>::appendProgramVFEState(const HardwareInfo &hwInfo, c
command->setComputeOverdispatchDisable(streamProperties.frontEndState.disableOverdispatch.value == 1);
auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
if (helper.getSteppingFromHwRevId(hwInfo) >= REVISION_B) {
command->setComputeOverdispatchDisable(true);
}
if (DebugManager.flags.CFEComputeOverdispatchDisable.get() != -1) {
command->setComputeOverdispatchDisable(DebugManager.flags.CFEComputeOverdispatchDisable.get());
}

View File

@ -551,13 +551,23 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInterfaceDescriptorDa
INTERFACE_DESCRIPTOR_DATA iddArg;
iddArg = FamilyType::cmdInitInterfaceDescriptorData;
const uint32_t forceThreadGroupDispatchSize = -1;
const uint32_t defaultThreadGroupDispatchSize = iddArg.getThreadGroupDispatchSize();
auto hwInfo = pDevice->getHardwareInfo();
auto &hwHelper = HwHelper::get(renderCoreFamily);
DebugManagerStateRestore restorer;
DebugManager.flags.ForceThreadGroupDispatchSize.set(forceThreadGroupDispatchSize);
EncodeDispatchKernel<FamilyType>::adjustInterfaceDescriptorData(iddArg, pDevice->getHardwareInfo());
EXPECT_EQ(defaultThreadGroupDispatchSize, iddArg.getThreadGroupDispatchSize());
uint32_t revisions[] = {REVISION_A0, REVISION_B};
for (auto revision : revisions) {
hwInfo.platform.usRevId = hwHelper.getHwRevIdFromStepping(revision, hwInfo);
EncodeDispatchKernel<FamilyType>::adjustInterfaceDescriptorData(iddArg, hwInfo);
if (hwHelper.isDisableOverdispatchAvailable(hwInfo)) {
EXPECT_EQ(3u, iddArg.getThreadGroupDispatchSize());
} else {
EXPECT_EQ(0u, iddArg.getThreadGroupDispatchSize());
}
}
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInterfaceDescriptorDataWhenForceThreadGroupDispatchSizeVariableIsSetThenThreadGroupDispatchSizeIsChanged) {