Limit multiple partition count to compute command lists

Related-To: NEO-6811

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-03-28 13:48:41 +00:00
committed by Compute-Runtime-Automation
parent 90c6d7d9c9
commit 9858438121
18 changed files with 52 additions and 57 deletions

View File

@@ -127,12 +127,6 @@ void CommandList::eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocati
}
}
bool CommandList::isCopyOnly() const {
const auto &hardwareInfo = device->getNEODevice()->getHardwareInfo();
auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
return hwHelper.isCopyOnlyEngineType(engineGroupType);
}
NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *kernel) {
NEO::PreemptionFlags flags = NEO::PreemptionHelper::createPreemptionLevelFlags(*device->getNEODevice(), &kernel->getImmutableData()->getDescriptor());
return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags);

View File

@@ -209,7 +209,9 @@ struct CommandList : _ze_command_list_handle_t {
void removeHostPtrAllocations();
void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation);
void eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation);
bool isCopyOnly() const;
bool isCopyOnly() const {
return NEO::EngineHelper::isCopyOnlyEngineType(engineGroupType);
}
bool isInternal() const {
return internalUsage;
}
@@ -269,11 +271,11 @@ struct CommandList : _ze_command_list_handle_t {
NEO::StreamProperties requiredStreamState{};
NEO::StreamProperties finalStreamState{};
CommandsToPatch commandsToPatch{};
ze_command_list_flags_t flags = 0u;
UnifiedMemoryControls unifiedMemoryControls;
ze_command_list_flags_t flags = 0u;
NEO::EngineGroupType engineGroupType;
bool indirectAllocationsAllowed = false;
bool internalUsage = false;
bool containsCooperativeKernelsFlag = false;

View File

@@ -90,12 +90,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
clearCommandsToPatch();
commandListSLMEnabled = false;
if (device->isImplicitScalingCapable() && !this->internalUsage) {
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
} else {
this->partitionCount = 1;
}
if (!isCopyOnly()) {
if (!NEO::ApiSpecificConfig::getBindlessConfiguration()) {
programStateBaseAddress(commandContainer, false);
@@ -120,7 +114,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
this->engineGroupType = engineGroupType;
this->flags = flags;
if (device->isImplicitScalingCapable() && !this->internalUsage) {
if (device->isImplicitScalingCapable() && !this->internalUsage && !isCopyOnly()) {
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
}

View File

@@ -180,8 +180,7 @@ ze_result_t DeviceImp::createCommandQueue(const ze_command_queue_desc_t *desc,
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
auto &hwHelper = NEO::HwHelper::get(platform.eRenderCoreFamily);
bool isCopyOnly = hwHelper.isCopyOnlyEngineType(engineGroups[desc->ordinal].engineGroupType);
bool isCopyOnly = NEO::EngineHelper::isCopyOnlyEngineType(engineGroups[desc->ordinal].engineGroupType);
if (desc->priority == ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW && !isCopyOnly) {
getCsrForLowPriority(&csr);

View File

@@ -46,7 +46,7 @@ class CommandListFixture : public DeviceFixture {
std::unique_ptr<Event> event;
};
template <bool createImmediate, bool createInternal>
template <bool createImmediate, bool createInternal, bool createCopy>
struct MultiTileCommandListFixture : public SingleRootMultiSubDeviceFixture {
void SetUp() {
DebugManager.flags.EnableImplicitScaling.set(1);
@@ -55,11 +55,14 @@ struct MultiTileCommandListFixture : public SingleRootMultiSubDeviceFixture {
SingleRootMultiSubDeviceFixture::SetUp();
ze_result_t returnValue;
NEO::EngineGroupType cmdListEngineType = createCopy ? NEO::EngineGroupType::Copy : NEO::EngineGroupType::RenderCompute;
if (!createImmediate) {
commandList.reset(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
commandList.reset(whitebox_cast(CommandList::create(productFamily, device, cmdListEngineType, 0u, returnValue)));
} else {
const ze_command_queue_desc_t desc = {};
commandList.reset(whitebox_cast(CommandList::createImmediate(productFamily, device, &desc, createInternal, NEO::EngineGroupType::RenderCompute, returnValue)));
commandList.reset(whitebox_cast(CommandList::createImmediate(productFamily, device, &desc, createInternal, cmdListEngineType, returnValue)));
}
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);

View File

@@ -14,7 +14,7 @@
namespace L0 {
namespace ult {
using MultiTileImmediateCommandListTest = Test<MultiTileCommandListFixture<true, false>>;
using MultiTileImmediateCommandListTest = Test<MultiTileCommandListFixture<true, false, false>>;
HWTEST2_F(MultiTileImmediateCommandListTest, GivenMultiTileDeviceWhenCreatingImmediateCommandListThenExpectPartitionCountMatchTileCount, IsWithinXeGfxFamily) {
EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count());
@@ -25,7 +25,7 @@ HWTEST2_F(MultiTileImmediateCommandListTest, GivenMultiTileDeviceWhenCreatingImm
EXPECT_EQ(2u, commandList->partitionCount);
}
using MultiTileImmediateInternalCommandListTest = Test<MultiTileCommandListFixture<true, true>>;
using MultiTileImmediateInternalCommandListTest = Test<MultiTileCommandListFixture<true, true, false>>;
HWTEST2_F(MultiTileImmediateInternalCommandListTest, GivenMultiTileDeviceWhenCreatingInternalImmediateCommandListThenExpectPartitionCountEqualOne, IsWithinXeGfxFamily) {
EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count());
@@ -36,6 +36,17 @@ HWTEST2_F(MultiTileImmediateInternalCommandListTest, GivenMultiTileDeviceWhenCre
EXPECT_EQ(1u, commandList->partitionCount);
}
using MultiTileCopyEngineCommandListTest = Test<MultiTileCommandListFixture<false, false, true>>;
HWTEST2_F(MultiTileCopyEngineCommandListTest, GivenMultiTileDeviceWhenCreatingCopyEngineCommandListThenExpectPartitionCountEqualOne, IsWithinXeGfxFamily) {
EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count());
EXPECT_EQ(1u, commandList->partitionCount);
auto returnValue = commandList->reset();
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(1u, commandList->partitionCount);
}
using CommandListExecuteImmediate = Test<DeviceFixture>;
HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlushTaskThenRequiredStreamStateIsCorrectlyReported, IsAtLeastSkl) {
auto &hwHelper = NEO::HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);

View File

@@ -80,7 +80,7 @@ HWTEST_F(CommandListAppendBarrier, GivenEventVsNoEventWhenAppendingBarrierThenCo
ASSERT_LE(sizeWithoutEvent, sizeWithEvent);
}
using MultiTileCommandListAppendBarrier = Test<MultiTileCommandListFixture<false, false>>;
using MultiTileCommandListAppendBarrier = Test<MultiTileCommandListFixture<false, false, false>>;
HWTEST2_F(MultiTileCommandListAppendBarrier, WhenAppendingBarrierThenPipeControlIsGenerated, IsWithinXeGfxFamily) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;

View File

@@ -142,7 +142,7 @@ HWTEST2_F(CommandListTests, whenCommandListIsCreatedAndProgramExtendedPipeContro
EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuAtomicsForStatelessAccesses());
}
using MultiTileCommandListTests = Test<MultiTileCommandListFixture<false, false>>;
using MultiTileCommandListTests = Test<MultiTileCommandListFixture<false, false, false>>;
HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListTests, givenPartitionedCommandListWhenCommandListIsCreatedThenStateBaseAddressCmdWithMultiPartialAndAtomicsCorrectlyProgrammed) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;

View File

@@ -137,7 +137,7 @@ HWTEST2_F(CommandListAppendLaunchKernelWithAtomics, givenKernelWithGlobalAtomics
EXPECT_FALSE(pCommandList->commandContainer.lastSentUseGlobalAtomics);
}
using MultTileCommandListAppendLaunchKernelL3Flush = Test<MultiTileCommandListFixture<false, false>>;
using MultTileCommandListAppendLaunchKernelL3Flush = Test<MultiTileCommandListFixture<false, false, false>>;
HWTEST2_F(MultTileCommandListAppendLaunchKernelL3Flush, givenKernelWithRegularEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;

View File

@@ -215,11 +215,10 @@ cl_command_queue_capabilities_intel ClDevice::getQueueFamilyCapabilitiesAll() {
}
cl_command_queue_capabilities_intel ClDevice::getQueueFamilyCapabilities(EngineGroupType type) {
auto &hwHelper = NEO::HwHelper::get(getHardwareInfo().platform.eRenderCoreFamily);
auto &clHwHelper = NEO::ClHwHelper::get(getHardwareInfo().platform.eRenderCoreFamily);
cl_command_queue_capabilities_intel disabledProperties = 0u;
if (hwHelper.isCopyOnlyEngineType(type)) {
if (EngineHelper::isCopyOnlyEngineType(type)) {
disabledProperties |= static_cast<cl_command_queue_capabilities_intel>(CL_QUEUE_CAPABILITY_KERNEL_INTEL);
disabledProperties |= static_cast<cl_command_queue_capabilities_intel>(CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL); // clEnqueueFillBuffer
disabledProperties |= static_cast<cl_command_queue_capabilities_intel>(CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL); // clEnqueueCopyImage

View File

@@ -951,7 +951,7 @@ void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage
const HardwareInfo &hwInfo = getDevice().getHardwareInfo();
const HwHelper &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
const EngineGroupType engineGroupType = hwHelper.getEngineGroupType(engineType, engineUsage, hwInfo);
const bool isEngineCopyOnly = hwHelper.isCopyOnlyEngineType(engineGroupType);
const bool isEngineCopyOnly = EngineHelper::isCopyOnlyEngineType(engineGroupType);
if (isEngineCopyOnly) {
std::fill(bcsEngines.begin(), bcsEngines.end(), nullptr);

View File

@@ -1474,4 +1474,16 @@ HWTEST_F(HwHelperTest, GivenHwInfoWhenGetBatchBufferEndSizeCalledThenCorrectSize
HWTEST_F(HwHelperTest, GivenHwInfoWhenGetBatchBufferEndReferenceCalledThenCorrectPtrReturned) {
const auto &hwHelper = HwHelper::get(renderCoreFamily);
EXPECT_EQ(hwHelper.getBatchBufferEndReference(), reinterpret_cast<const void *>(&FamilyType::cmdInitBatchBufferEnd));
}
}
HWTEST_F(HwHelperTest, givenHwHelperWhenPassingCopyEngineTypeThenItsCopyOnly) {
EXPECT_TRUE(EngineHelper::isCopyOnlyEngineType(EngineGroupType::Copy));
}
HWTEST_F(HwHelperTest, givenHwHelperWhenPassingLinkedCopyEngineTypeThenItsCopyOnly) {
EXPECT_TRUE(EngineHelper::isCopyOnlyEngineType(EngineGroupType::LinkedCopy));
}
HWTEST_F(HwHelperTest, givenHwHelperWhenPassingComputeEngineTypeThenItsNotCopyOnly) {
EXPECT_FALSE(EngineHelper::isCopyOnlyEngineType(EngineGroupType::Compute));
}

View File

@@ -854,18 +854,6 @@ XE_HPC_CORETEST_F(LriHelperTestsXeHpcCore, whenProgrammingLriCommandThenExpectMm
EXPECT_TRUE(memcmp(lri, &expectedLri, sizeof(MI_LOAD_REGISTER_IMM)) == 0);
}
XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenHwHelperWhenPassingCopyEngineTypeThenItsCopyOnly) {
auto &helper = HwHelper::get(renderCoreFamily);
EXPECT_TRUE(helper.isCopyOnlyEngineType(EngineGroupType::Copy));
}
XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenHwHelperWhenPassingLinkedCopyEngineTypeThenItsCopyOnly) {
auto &helper = HwHelper::get(renderCoreFamily);
EXPECT_TRUE(helper.isCopyOnlyEngineType(EngineGroupType::LinkedCopy));
}
XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenHwHelperWhenPassingComputeEngineTypeThenItsNotCopyOnly) {
auto &helper = HwHelper::get(renderCoreFamily);
EXPECT_FALSE(helper.isCopyOnlyEngineType(EngineGroupType::Compute));
}
XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenCccsDisabledWhenGetGpgpuEnginesCalledThenDontSetCccs) {
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.featureTable.flags.ftrCCSNode = true;

View File

@@ -311,7 +311,7 @@ void Device::addEngineToEngineGroup(EngineControl &engine) {
return;
}
if (hwHelper.isCopyOnlyEngineType(engineGroupType) && DebugManager.flags.EnableBlitterOperationsSupport.get() == 0) {
if (EngineHelper::isCopyOnlyEngineType(engineGroupType) && DebugManager.flags.EnableBlitterOperationsSupport.get() == 0) {
return;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -20,4 +20,10 @@ enum class EngineGroupType : uint32_t {
MaxEngineGroups
};
struct EngineHelper {
static bool isCopyOnlyEngineType(EngineGroupType type) {
return (EngineGroupType::Copy == type || EngineGroupType::LinkedCopy == type);
}
};
} // namespace NEO

View File

@@ -115,7 +115,6 @@ class HwHelper {
const HardwareInfo &hwInfo, bool isEngineInstanced) const = 0;
virtual size_t getMaxFillPaternSizeForCopyEngine() const = 0;
virtual size_t getSipKernelMaxDbgSurfaceSize(const HardwareInfo &hwInfo) const = 0;
virtual bool isCopyOnlyEngineType(EngineGroupType type) const = 0;
virtual bool isSipWANeeded(const HardwareInfo &hwInfo) const = 0;
virtual bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const = 0;
virtual bool isKmdMigrationSupported(const HardwareInfo &hwInfo) const = 0;
@@ -344,8 +343,6 @@ class HwHelperHw : public HwHelper {
bool isCooperativeEngineSupported(const HardwareInfo &hwInfo) const override;
bool isCopyOnlyEngineType(EngineGroupType type) const override;
bool isSipWANeeded(const HardwareInfo &hwInfo) const override;
bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const override;

View File

@@ -632,11 +632,6 @@ bool HwHelperHw<GfxFamily>::isCooperativeEngineSupported(const HardwareInfo &hwI
return false;
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isCopyOnlyEngineType(EngineGroupType type) const {
return NEO::EngineGroupType::Copy == type;
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isSipWANeeded(const HardwareInfo &hwInfo) const {
return false;

View File

@@ -377,11 +377,6 @@ int32_t HwHelperHw<Family>::getDefaultThreadArbitrationPolicy() const {
return ThreadArbitrationPolicy::RoundRobinAfterDependency;
}
template <>
bool HwHelperHw<Family>::isCopyOnlyEngineType(EngineGroupType type) const {
return (EngineGroupType::Copy == type || EngineGroupType::LinkedCopy == type);
}
template <>
bool HwHelperHw<Family>::isSubDeviceEngineSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield, aub_stream::EngineType engineType) const {
constexpr uint64_t tile1Bitfield = 0b10;