mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Disable overdispatch by default
When disable overdispatch is available: - change default value of CFE_STATE::ComputeOverdispatchDisable to true. - change default value of INTERFACE_DESCRIPTOR_DATA::ThreadGroupDispatchSize to 3u. Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
d3fd5077e7
commit
2dd0e67e65
@ -2007,14 +2007,17 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE;
|
||||
|
||||
auto &hwInfo = device->getHwInfo();
|
||||
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
auto disableOverdispatch = hwHelper.isDisableOverdispatchAvailable(hwInfo);
|
||||
|
||||
if (!containsAnyKernel) {
|
||||
requiredStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), false, device->getHwInfo());
|
||||
requiredStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), disableOverdispatch, device->getHwInfo());
|
||||
finalStreamState = requiredStreamState;
|
||||
containsAnyKernel = true;
|
||||
}
|
||||
|
||||
auto &hwInfo = device->getHwInfo();
|
||||
finalStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), false, hwInfo);
|
||||
finalStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), disableOverdispatch, hwInfo);
|
||||
if (finalStreamState.frontEndState.isDirty()) {
|
||||
auto pVfeStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType);
|
||||
auto pVfeState = new VFE_STATE_TYPE;
|
||||
@ -2025,7 +2028,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
|
||||
|
||||
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
|
||||
auto &neoDevice = *device->getNEODevice();
|
||||
auto threadArbitrationPolicy = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily).getDefaultThreadArbitrationPolicy();
|
||||
auto threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
|
||||
finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, threadArbitrationPolicy);
|
||||
|
||||
NEO::EncodeWA<GfxFamily>::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), true);
|
||||
|
@ -1292,5 +1292,31 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
|
||||
}
|
||||
}
|
||||
|
||||
using Platforms = IsAtLeastProduct<IGFX_SKYLAKE>;
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, whenUpdateStreamPropertiesIsCalledThenRequiredStateAndFinalStateAreCorrectlySet, Platforms) {
|
||||
Mock<::L0::Kernel> kernel;
|
||||
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
||||
kernel.module = pMockModule.get();
|
||||
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(-1, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value);
|
||||
EXPECT_EQ(-1, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value);
|
||||
|
||||
auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
int32_t expectedDisableOverdispatch = hwHelper.isDisableOverdispatchAvailable(*defaultHwInfo);
|
||||
|
||||
pCommandList->updateStreamProperties(kernel, false);
|
||||
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value);
|
||||
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value);
|
||||
|
||||
pCommandList->updateStreamProperties(kernel, false);
|
||||
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value);
|
||||
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
@ -559,7 +559,7 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
|
||||
UnifiedMemoryControls unifiedMemoryControls{};
|
||||
bool isUnifiedMemorySyncRequired = true;
|
||||
bool debugEnabled = false;
|
||||
uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
|
||||
uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch;
|
||||
|
||||
uint32_t *maxWorkGroupSizeForCrossThreadData = &Kernel::dummyPatchLocation;
|
||||
uint32_t maxKernelWorkGroupSize = 0;
|
||||
|
@ -5,8 +5,13 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/scratch_space_controller.h"
|
||||
#include "shared/source/command_stream/scratch_space_controller_base.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/ult_device_factory.h"
|
||||
#include "shared/test/unit_test/utilities/base_object_utils.h"
|
||||
|
||||
#include "opencl/source/event/user_event.h"
|
||||
@ -563,6 +568,46 @@ HWTEST_F(CommandStreamReceiverHwTest, WhenForceEnableGpuIdleImplicitFlushThenExp
|
||||
EXPECT_TRUE(commandStreamReceiver->useGpuIdleImplicitFlush);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandStreamReceiverHwTest, whenProgramVFEStateIsCalledThenCorrectComputeOverdispatchDisableValueIsProgrammed, IsAtLeastXeHpCore) {
|
||||
using CFE_STATE = typename FamilyType::CFE_STATE;
|
||||
|
||||
UltDeviceFactory deviceFactory{1, 0};
|
||||
auto pDevice = deviceFactory.rootDevices[0];
|
||||
auto pHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(pHwInfo->platform.eRenderCoreFamily);
|
||||
|
||||
uint8_t memory[1 * KB];
|
||||
auto mockCsr = std::make_unique<MockCsrHw2<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(),
|
||||
pDevice->getDeviceBitfield());
|
||||
MockOsContext osContext{0, 8, EngineTypeUsage{aub_stream::ENGINE_CCS, EngineUsage::Regular}, PreemptionMode::Disabled, false};
|
||||
mockCsr->setupContext(osContext);
|
||||
|
||||
uint32_t revisions[] = {REVISION_A0, REVISION_B};
|
||||
for (auto revision : revisions) {
|
||||
pHwInfo->platform.usRevId = hwHelper.getHwRevIdFromStepping(revision, *pHwInfo);
|
||||
|
||||
{
|
||||
auto flags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
LinearStream commandStream{&memory, sizeof(memory)};
|
||||
mockCsr->mediaVfeStateDirty = true;
|
||||
mockCsr->programVFEState(commandStream, flags, 10);
|
||||
auto pCommand = reinterpret_cast<CFE_STATE *>(&memory);
|
||||
|
||||
auto expectedDisableOverdispatch = hwHelper.isDisableOverdispatchAvailable(*pHwInfo);
|
||||
EXPECT_EQ(expectedDisableOverdispatch, pCommand->getComputeOverdispatchDisable());
|
||||
}
|
||||
{
|
||||
auto flags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
flags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
|
||||
LinearStream commandStream{&memory, sizeof(memory)};
|
||||
mockCsr->mediaVfeStateDirty = true;
|
||||
mockCsr->programVFEState(commandStream, flags, 10);
|
||||
auto pCommand = reinterpret_cast<CFE_STATE *>(&memory);
|
||||
EXPECT_FALSE(pCommand->getComputeOverdispatchDisable());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(BcsTests, WhenGetNumberOfBlitsForCopyPerRowIsCalledThenCorrectValuesAreReturned) {
|
||||
auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment();
|
||||
auto maxWidthToCopy = static_cast<size_t>(BlitCommandsHelper<FamilyType>::getMaxBlitWidth(rootDeviceEnvironment));
|
||||
|
@ -1292,10 +1292,9 @@ HWTEST_F(HwHelperTest, givenHwHelperWhenIsBlitterForImagesSupportedIsCalledThenF
|
||||
EXPECT_FALSE(helper.isBlitterForImagesSupported(*defaultHwInfo));
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenHwHelperWhenAdditionalKernelExecInfoSupportCheckedThenReturnFalse) {
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenHwHelperWhenAdditionalKernelExecInfoSupportCheckedThenCorrectValueIsReturned) {
|
||||
auto &helper = HwHelper::get(renderCoreFamily);
|
||||
|
||||
EXPECT_FALSE(helper.additionalKernelExecInfoSupported(*defaultHwInfo));
|
||||
EXPECT_FALSE(helper.isDisableOverdispatchAvailable(*defaultHwInfo));
|
||||
}
|
||||
|
||||
TEST_F(HwHelperTest, WhenGettingIsCpuImageTransferPreferredThenFalseIsReturned) {
|
||||
|
@ -298,3 +298,12 @@ XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenXeHpCoreWhenIsBlitterForImagesSup
|
||||
auto &helper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
EXPECT_TRUE(helper.isBlitterForImagesSupported(hwInfo));
|
||||
}
|
||||
|
||||
XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenHwHelperWhenAdditionalKernelExecInfoSupportCheckedThenCorrectValueIsReturned) {
|
||||
auto &hwHelper = HwHelper::get(renderCoreFamily);
|
||||
auto hwInfo = *defaultHwInfo;
|
||||
EXPECT_FALSE(hwHelper.isDisableOverdispatchAvailable(hwInfo));
|
||||
|
||||
hwInfo.platform.usRevId = hwHelper.getHwRevIdFromStepping(REVISION_B, hwInfo);
|
||||
EXPECT_TRUE(hwHelper.isDisableOverdispatchAvailable(hwInfo));
|
||||
}
|
||||
|
@ -42,35 +42,3 @@ XE_HP_CORE_TEST_F(CmdsProgrammingTestsXeHpCore, givenL1CachingOverrideWhenStateB
|
||||
|
||||
memoryManager->freeGraphicsMemory(allocation);
|
||||
}
|
||||
|
||||
XE_HP_CORE_TEST_F(CmdsProgrammingTestsXeHpCore, givenInterfaceDescriptorDataWhenBSteppingIsDetectedThenTGBatchSizeIsEqualTo3) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
INTERFACE_DESCRIPTOR_DATA iddArg;
|
||||
iddArg = FamilyType::cmdInitInterfaceDescriptorData;
|
||||
|
||||
pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.usRevId = REVISION_B;
|
||||
|
||||
EncodeDispatchKernel<FamilyType>::adjustInterfaceDescriptorData(iddArg, pDevice->getHardwareInfo());
|
||||
EXPECT_EQ(3u, iddArg.getThreadGroupDispatchSize());
|
||||
}
|
||||
|
||||
using PreambleCfeState = PreambleFixture;
|
||||
|
||||
XE_HP_CORE_TEST_F(PreambleCfeState, givenXehpBSteppingWhenCfeIsProgrammedThenOverdispatchIsDisabled) {
|
||||
using CFE_STATE = typename FamilyType::CFE_STATE;
|
||||
|
||||
auto backup = defaultHwInfo->platform.usRevId;
|
||||
defaultHwInfo->platform.usRevId = REVISION_B;
|
||||
|
||||
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute);
|
||||
StreamProperties streamProperties{};
|
||||
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, AdditionalKernelExecInfo::NotApplicable, streamProperties);
|
||||
parseCommands<FamilyType>(linearStream);
|
||||
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), cfeStateIt);
|
||||
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
|
||||
|
||||
EXPECT_TRUE(cfeState->getComputeOverdispatchDisable());
|
||||
defaultHwInfo->platform.usRevId = backup;
|
||||
}
|
||||
|
@ -933,10 +933,11 @@ inline void CommandStreamReceiverHw<GfxFamily>::programVFEState(LinearStream &cs
|
||||
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
auto engineGroupType = hwHelper.getEngineGroupType(getOsContext().getEngineType(), hwInfo);
|
||||
auto pVfeState = PreambleHelper<GfxFamily>::getSpaceForVfeState(&csr, hwInfo, engineGroupType);
|
||||
auto disableOverdispatch = hwHelper.isDisableOverdispatchAvailable(hwInfo) &&
|
||||
(dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotSet);
|
||||
StreamProperties streamProperties{};
|
||||
streamProperties.frontEndState.setProperties(lastKernelExecutionType == KernelExecutionType::Concurrent,
|
||||
dispatchFlags.additionalKernelExecInfo == AdditionalKernelExecInfo::DisableOverdispatch,
|
||||
hwInfo);
|
||||
disableOverdispatch, hwInfo);
|
||||
PreambleHelper<GfxFamily>::programVfeState(
|
||||
pVfeState, hwInfo, requiredScratchSize, getScratchPatchAddress(),
|
||||
maxFrontEndThreads, lastAdditionalKernelExecInfo, streamProperties);
|
||||
|
@ -134,7 +134,7 @@ class HwHelper {
|
||||
virtual bool isCopyOnlyEngineType(EngineGroupType type) const = 0;
|
||||
virtual void adjustAddressWidthForCanonize(uint32_t &addressWidth) const = 0;
|
||||
virtual bool isSipWANeeded(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool additionalKernelExecInfoSupported(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isDisableOverdispatchAvailable(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isKmdMigrationSupported(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isNewResidencyModelSupported() const = 0;
|
||||
@ -359,7 +359,7 @@ class HwHelperHw : public HwHelper {
|
||||
|
||||
bool isSipWANeeded(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
bool additionalKernelExecInfoSupported(const HardwareInfo &hwInfo) const override;
|
||||
bool isDisableOverdispatchAvailable(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
|
@ -462,7 +462,7 @@ inline bool HwHelperHw<GfxFamily>::isBlitCopyRequiredForLocalMemory(const Hardwa
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::additionalKernelExecInfoSupported(const HardwareInfo &hwInfo) const {
|
||||
bool HwHelperHw<GfxFamily>::isDisableOverdispatchAvailable(const HardwareInfo &hwInfo) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -44,9 +44,8 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
|
||||
|
||||
template <>
|
||||
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) {
|
||||
auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
|
||||
if (helper.getSteppingFromHwRevId(hwInfo) >= REVISION_B) {
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
if (hwHelper.isDisableOverdispatchAvailable(hwInfo)) {
|
||||
interfaceDescriptor.setThreadGroupDispatchSize(3u);
|
||||
}
|
||||
|
||||
|
@ -174,6 +174,11 @@ bool HwHelperHw<Family>::isBlitterForImagesSupported(const HardwareInfo &hwInfo)
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::isDisableOverdispatchAvailable(const HardwareInfo &hwInfo) const {
|
||||
return (this->getSteppingFromHwRevId(hwInfo) >= REVISION_B);
|
||||
}
|
||||
|
||||
template <>
|
||||
void MemorySynchronizationCommands<Family>::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) {
|
||||
using PIPE_CONTROL = typename Family::PIPE_CONTROL;
|
||||
|
@ -22,11 +22,6 @@ void PreambleHelper<Family>::appendProgramVFEState(const HardwareInfo &hwInfo, c
|
||||
|
||||
command->setComputeOverdispatchDisable(streamProperties.frontEndState.disableOverdispatch.value == 1);
|
||||
|
||||
auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
if (helper.getSteppingFromHwRevId(hwInfo) >= REVISION_B) {
|
||||
command->setComputeOverdispatchDisable(true);
|
||||
}
|
||||
|
||||
if (DebugManager.flags.CFEComputeOverdispatchDisable.get() != -1) {
|
||||
command->setComputeOverdispatchDisable(DebugManager.flags.CFEComputeOverdispatchDisable.get());
|
||||
}
|
||||
|
@ -551,13 +551,23 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInterfaceDescriptorDa
|
||||
INTERFACE_DESCRIPTOR_DATA iddArg;
|
||||
iddArg = FamilyType::cmdInitInterfaceDescriptorData;
|
||||
const uint32_t forceThreadGroupDispatchSize = -1;
|
||||
const uint32_t defaultThreadGroupDispatchSize = iddArg.getThreadGroupDispatchSize();
|
||||
auto hwInfo = pDevice->getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(renderCoreFamily);
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.ForceThreadGroupDispatchSize.set(forceThreadGroupDispatchSize);
|
||||
|
||||
EncodeDispatchKernel<FamilyType>::adjustInterfaceDescriptorData(iddArg, pDevice->getHardwareInfo());
|
||||
EXPECT_EQ(defaultThreadGroupDispatchSize, iddArg.getThreadGroupDispatchSize());
|
||||
uint32_t revisions[] = {REVISION_A0, REVISION_B};
|
||||
for (auto revision : revisions) {
|
||||
hwInfo.platform.usRevId = hwHelper.getHwRevIdFromStepping(revision, hwInfo);
|
||||
EncodeDispatchKernel<FamilyType>::adjustInterfaceDescriptorData(iddArg, hwInfo);
|
||||
|
||||
if (hwHelper.isDisableOverdispatchAvailable(hwInfo)) {
|
||||
EXPECT_EQ(3u, iddArg.getThreadGroupDispatchSize());
|
||||
} else {
|
||||
EXPECT_EQ(0u, iddArg.getThreadGroupDispatchSize());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInterfaceDescriptorDataWhenForceThreadGroupDispatchSizeVariableIsSetThenThreadGroupDispatchSizeIsChanged) {
|
||||
|
Reference in New Issue
Block a user