mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 16:24:18 +08:00
Limit multiple partition count to compute command lists
Related-To: NEO-6811 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
90c6d7d9c9
commit
9858438121
@@ -127,12 +127,6 @@ void CommandList::eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocati
|
||||
}
|
||||
}
|
||||
|
||||
bool CommandList::isCopyOnly() const {
|
||||
const auto &hardwareInfo = device->getNEODevice()->getHardwareInfo();
|
||||
auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
return hwHelper.isCopyOnlyEngineType(engineGroupType);
|
||||
}
|
||||
|
||||
NEO::PreemptionMode CommandList::obtainFunctionPreemptionMode(Kernel *kernel) {
|
||||
NEO::PreemptionFlags flags = NEO::PreemptionHelper::createPreemptionLevelFlags(*device->getNEODevice(), &kernel->getImmutableData()->getDescriptor());
|
||||
return NEO::PreemptionHelper::taskPreemptionMode(device->getDevicePreemptionMode(), flags);
|
||||
|
||||
@@ -209,7 +209,9 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
void removeHostPtrAllocations();
|
||||
void eraseDeallocationContainerEntry(NEO::GraphicsAllocation *allocation);
|
||||
void eraseResidencyContainerEntry(NEO::GraphicsAllocation *allocation);
|
||||
bool isCopyOnly() const;
|
||||
bool isCopyOnly() const {
|
||||
return NEO::EngineHelper::isCopyOnlyEngineType(engineGroupType);
|
||||
}
|
||||
bool isInternal() const {
|
||||
return internalUsage;
|
||||
}
|
||||
@@ -269,11 +271,11 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
NEO::StreamProperties requiredStreamState{};
|
||||
NEO::StreamProperties finalStreamState{};
|
||||
CommandsToPatch commandsToPatch{};
|
||||
|
||||
ze_command_list_flags_t flags = 0u;
|
||||
UnifiedMemoryControls unifiedMemoryControls;
|
||||
|
||||
ze_command_list_flags_t flags = 0u;
|
||||
NEO::EngineGroupType engineGroupType;
|
||||
|
||||
bool indirectAllocationsAllowed = false;
|
||||
bool internalUsage = false;
|
||||
bool containsCooperativeKernelsFlag = false;
|
||||
|
||||
@@ -90,12 +90,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
|
||||
clearCommandsToPatch();
|
||||
commandListSLMEnabled = false;
|
||||
|
||||
if (device->isImplicitScalingCapable() && !this->internalUsage) {
|
||||
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
|
||||
} else {
|
||||
this->partitionCount = 1;
|
||||
}
|
||||
|
||||
if (!isCopyOnly()) {
|
||||
if (!NEO::ApiSpecificConfig::getBindlessConfiguration()) {
|
||||
programStateBaseAddress(commandContainer, false);
|
||||
@@ -120,7 +114,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
||||
this->engineGroupType = engineGroupType;
|
||||
this->flags = flags;
|
||||
|
||||
if (device->isImplicitScalingCapable() && !this->internalUsage) {
|
||||
if (device->isImplicitScalingCapable() && !this->internalUsage && !isCopyOnly()) {
|
||||
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
|
||||
}
|
||||
|
||||
|
||||
@@ -180,8 +180,7 @@ ze_result_t DeviceImp::createCommandQueue(const ze_command_queue_desc_t *desc,
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
auto &hwHelper = NEO::HwHelper::get(platform.eRenderCoreFamily);
|
||||
bool isCopyOnly = hwHelper.isCopyOnlyEngineType(engineGroups[desc->ordinal].engineGroupType);
|
||||
bool isCopyOnly = NEO::EngineHelper::isCopyOnlyEngineType(engineGroups[desc->ordinal].engineGroupType);
|
||||
|
||||
if (desc->priority == ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW && !isCopyOnly) {
|
||||
getCsrForLowPriority(&csr);
|
||||
|
||||
@@ -46,7 +46,7 @@ class CommandListFixture : public DeviceFixture {
|
||||
std::unique_ptr<Event> event;
|
||||
};
|
||||
|
||||
template <bool createImmediate, bool createInternal>
|
||||
template <bool createImmediate, bool createInternal, bool createCopy>
|
||||
struct MultiTileCommandListFixture : public SingleRootMultiSubDeviceFixture {
|
||||
void SetUp() {
|
||||
DebugManager.flags.EnableImplicitScaling.set(1);
|
||||
@@ -55,11 +55,14 @@ struct MultiTileCommandListFixture : public SingleRootMultiSubDeviceFixture {
|
||||
|
||||
SingleRootMultiSubDeviceFixture::SetUp();
|
||||
ze_result_t returnValue;
|
||||
|
||||
NEO::EngineGroupType cmdListEngineType = createCopy ? NEO::EngineGroupType::Copy : NEO::EngineGroupType::RenderCompute;
|
||||
|
||||
if (!createImmediate) {
|
||||
commandList.reset(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
|
||||
commandList.reset(whitebox_cast(CommandList::create(productFamily, device, cmdListEngineType, 0u, returnValue)));
|
||||
} else {
|
||||
const ze_command_queue_desc_t desc = {};
|
||||
commandList.reset(whitebox_cast(CommandList::createImmediate(productFamily, device, &desc, createInternal, NEO::EngineGroupType::RenderCompute, returnValue)));
|
||||
commandList.reset(whitebox_cast(CommandList::createImmediate(productFamily, device, &desc, createInternal, cmdListEngineType, returnValue)));
|
||||
}
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
using MultiTileImmediateCommandListTest = Test<MultiTileCommandListFixture<true, false>>;
|
||||
using MultiTileImmediateCommandListTest = Test<MultiTileCommandListFixture<true, false, false>>;
|
||||
|
||||
HWTEST2_F(MultiTileImmediateCommandListTest, GivenMultiTileDeviceWhenCreatingImmediateCommandListThenExpectPartitionCountMatchTileCount, IsWithinXeGfxFamily) {
|
||||
EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count());
|
||||
@@ -25,7 +25,7 @@ HWTEST2_F(MultiTileImmediateCommandListTest, GivenMultiTileDeviceWhenCreatingImm
|
||||
EXPECT_EQ(2u, commandList->partitionCount);
|
||||
}
|
||||
|
||||
using MultiTileImmediateInternalCommandListTest = Test<MultiTileCommandListFixture<true, true>>;
|
||||
using MultiTileImmediateInternalCommandListTest = Test<MultiTileCommandListFixture<true, true, false>>;
|
||||
|
||||
HWTEST2_F(MultiTileImmediateInternalCommandListTest, GivenMultiTileDeviceWhenCreatingInternalImmediateCommandListThenExpectPartitionCountEqualOne, IsWithinXeGfxFamily) {
|
||||
EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count());
|
||||
@@ -36,6 +36,17 @@ HWTEST2_F(MultiTileImmediateInternalCommandListTest, GivenMultiTileDeviceWhenCre
|
||||
EXPECT_EQ(1u, commandList->partitionCount);
|
||||
}
|
||||
|
||||
using MultiTileCopyEngineCommandListTest = Test<MultiTileCommandListFixture<false, false, true>>;
|
||||
|
||||
HWTEST2_F(MultiTileCopyEngineCommandListTest, GivenMultiTileDeviceWhenCreatingCopyEngineCommandListThenExpectPartitionCountEqualOne, IsWithinXeGfxFamily) {
|
||||
EXPECT_EQ(2u, device->getNEODevice()->getDeviceBitfield().count());
|
||||
EXPECT_EQ(1u, commandList->partitionCount);
|
||||
|
||||
auto returnValue = commandList->reset();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
EXPECT_EQ(1u, commandList->partitionCount);
|
||||
}
|
||||
|
||||
using CommandListExecuteImmediate = Test<DeviceFixture>;
|
||||
HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlushTaskThenRequiredStreamStateIsCorrectlyReported, IsAtLeastSkl) {
|
||||
auto &hwHelper = NEO::HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
|
||||
@@ -80,7 +80,7 @@ HWTEST_F(CommandListAppendBarrier, GivenEventVsNoEventWhenAppendingBarrierThenCo
|
||||
ASSERT_LE(sizeWithoutEvent, sizeWithEvent);
|
||||
}
|
||||
|
||||
using MultiTileCommandListAppendBarrier = Test<MultiTileCommandListFixture<false, false>>;
|
||||
using MultiTileCommandListAppendBarrier = Test<MultiTileCommandListFixture<false, false, false>>;
|
||||
|
||||
HWTEST2_F(MultiTileCommandListAppendBarrier, WhenAppendingBarrierThenPipeControlIsGenerated, IsWithinXeGfxFamily) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
@@ -142,7 +142,7 @@ HWTEST2_F(CommandListTests, whenCommandListIsCreatedAndProgramExtendedPipeContro
|
||||
EXPECT_TRUE(cmdSba->getDisableSupportForMultiGpuAtomicsForStatelessAccesses());
|
||||
}
|
||||
|
||||
using MultiTileCommandListTests = Test<MultiTileCommandListFixture<false, false>>;
|
||||
using MultiTileCommandListTests = Test<MultiTileCommandListFixture<false, false, false>>;
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListTests, givenPartitionedCommandListWhenCommandListIsCreatedThenStateBaseAddressCmdWithMultiPartialAndAtomicsCorrectlyProgrammed) {
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
|
||||
|
||||
@@ -137,7 +137,7 @@ HWTEST2_F(CommandListAppendLaunchKernelWithAtomics, givenKernelWithGlobalAtomics
|
||||
EXPECT_FALSE(pCommandList->commandContainer.lastSentUseGlobalAtomics);
|
||||
}
|
||||
|
||||
using MultTileCommandListAppendLaunchKernelL3Flush = Test<MultiTileCommandListFixture<false, false>>;
|
||||
using MultTileCommandListAppendLaunchKernelL3Flush = Test<MultiTileCommandListFixture<false, false, false>>;
|
||||
|
||||
HWTEST2_F(MultTileCommandListAppendLaunchKernelL3Flush, givenKernelWithRegularEventAndWithWalkerPartitionThenProperCommandsEncoded, IsXeHpCore) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
@@ -215,11 +215,10 @@ cl_command_queue_capabilities_intel ClDevice::getQueueFamilyCapabilitiesAll() {
|
||||
}
|
||||
|
||||
cl_command_queue_capabilities_intel ClDevice::getQueueFamilyCapabilities(EngineGroupType type) {
|
||||
auto &hwHelper = NEO::HwHelper::get(getHardwareInfo().platform.eRenderCoreFamily);
|
||||
auto &clHwHelper = NEO::ClHwHelper::get(getHardwareInfo().platform.eRenderCoreFamily);
|
||||
|
||||
cl_command_queue_capabilities_intel disabledProperties = 0u;
|
||||
if (hwHelper.isCopyOnlyEngineType(type)) {
|
||||
if (EngineHelper::isCopyOnlyEngineType(type)) {
|
||||
disabledProperties |= static_cast<cl_command_queue_capabilities_intel>(CL_QUEUE_CAPABILITY_KERNEL_INTEL);
|
||||
disabledProperties |= static_cast<cl_command_queue_capabilities_intel>(CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL); // clEnqueueFillBuffer
|
||||
disabledProperties |= static_cast<cl_command_queue_capabilities_intel>(CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL); // clEnqueueCopyImage
|
||||
|
||||
@@ -951,7 +951,7 @@ void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage
|
||||
const HardwareInfo &hwInfo = getDevice().getHardwareInfo();
|
||||
const HwHelper &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
const EngineGroupType engineGroupType = hwHelper.getEngineGroupType(engineType, engineUsage, hwInfo);
|
||||
const bool isEngineCopyOnly = hwHelper.isCopyOnlyEngineType(engineGroupType);
|
||||
const bool isEngineCopyOnly = EngineHelper::isCopyOnlyEngineType(engineGroupType);
|
||||
|
||||
if (isEngineCopyOnly) {
|
||||
std::fill(bcsEngines.begin(), bcsEngines.end(), nullptr);
|
||||
|
||||
@@ -1474,4 +1474,16 @@ HWTEST_F(HwHelperTest, GivenHwInfoWhenGetBatchBufferEndSizeCalledThenCorrectSize
|
||||
HWTEST_F(HwHelperTest, GivenHwInfoWhenGetBatchBufferEndReferenceCalledThenCorrectPtrReturned) {
|
||||
const auto &hwHelper = HwHelper::get(renderCoreFamily);
|
||||
EXPECT_EQ(hwHelper.getBatchBufferEndReference(), reinterpret_cast<const void *>(&FamilyType::cmdInitBatchBufferEnd));
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(HwHelperTest, givenHwHelperWhenPassingCopyEngineTypeThenItsCopyOnly) {
|
||||
EXPECT_TRUE(EngineHelper::isCopyOnlyEngineType(EngineGroupType::Copy));
|
||||
}
|
||||
|
||||
HWTEST_F(HwHelperTest, givenHwHelperWhenPassingLinkedCopyEngineTypeThenItsCopyOnly) {
|
||||
EXPECT_TRUE(EngineHelper::isCopyOnlyEngineType(EngineGroupType::LinkedCopy));
|
||||
}
|
||||
|
||||
HWTEST_F(HwHelperTest, givenHwHelperWhenPassingComputeEngineTypeThenItsNotCopyOnly) {
|
||||
EXPECT_FALSE(EngineHelper::isCopyOnlyEngineType(EngineGroupType::Compute));
|
||||
}
|
||||
|
||||
@@ -854,18 +854,6 @@ XE_HPC_CORETEST_F(LriHelperTestsXeHpcCore, whenProgrammingLriCommandThenExpectMm
|
||||
EXPECT_TRUE(memcmp(lri, &expectedLri, sizeof(MI_LOAD_REGISTER_IMM)) == 0);
|
||||
}
|
||||
|
||||
XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenHwHelperWhenPassingCopyEngineTypeThenItsCopyOnly) {
|
||||
auto &helper = HwHelper::get(renderCoreFamily);
|
||||
EXPECT_TRUE(helper.isCopyOnlyEngineType(EngineGroupType::Copy));
|
||||
}
|
||||
XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenHwHelperWhenPassingLinkedCopyEngineTypeThenItsCopyOnly) {
|
||||
auto &helper = HwHelper::get(renderCoreFamily);
|
||||
EXPECT_TRUE(helper.isCopyOnlyEngineType(EngineGroupType::LinkedCopy));
|
||||
}
|
||||
XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenHwHelperWhenPassingComputeEngineTypeThenItsNotCopyOnly) {
|
||||
auto &helper = HwHelper::get(renderCoreFamily);
|
||||
EXPECT_FALSE(helper.isCopyOnlyEngineType(EngineGroupType::Compute));
|
||||
}
|
||||
XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenCccsDisabledWhenGetGpgpuEnginesCalledThenDontSetCccs) {
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
hwInfo.featureTable.flags.ftrCCSNode = true;
|
||||
|
||||
@@ -311,7 +311,7 @@ void Device::addEngineToEngineGroup(EngineControl &engine) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (hwHelper.isCopyOnlyEngineType(engineGroupType) && DebugManager.flags.EnableBlitterOperationsSupport.get() == 0) {
|
||||
if (EngineHelper::isCopyOnlyEngineType(engineGroupType) && DebugManager.flags.EnableBlitterOperationsSupport.get() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -20,4 +20,10 @@ enum class EngineGroupType : uint32_t {
|
||||
MaxEngineGroups
|
||||
};
|
||||
|
||||
struct EngineHelper {
|
||||
static bool isCopyOnlyEngineType(EngineGroupType type) {
|
||||
return (EngineGroupType::Copy == type || EngineGroupType::LinkedCopy == type);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -115,7 +115,6 @@ class HwHelper {
|
||||
const HardwareInfo &hwInfo, bool isEngineInstanced) const = 0;
|
||||
virtual size_t getMaxFillPaternSizeForCopyEngine() const = 0;
|
||||
virtual size_t getSipKernelMaxDbgSurfaceSize(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isCopyOnlyEngineType(EngineGroupType type) const = 0;
|
||||
virtual bool isSipWANeeded(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isKmdMigrationSupported(const HardwareInfo &hwInfo) const = 0;
|
||||
@@ -344,8 +343,6 @@ class HwHelperHw : public HwHelper {
|
||||
|
||||
bool isCooperativeEngineSupported(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
bool isCopyOnlyEngineType(EngineGroupType type) const override;
|
||||
|
||||
bool isSipWANeeded(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
@@ -632,11 +632,6 @@ bool HwHelperHw<GfxFamily>::isCooperativeEngineSupported(const HardwareInfo &hwI
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::isCopyOnlyEngineType(EngineGroupType type) const {
|
||||
return NEO::EngineGroupType::Copy == type;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::isSipWANeeded(const HardwareInfo &hwInfo) const {
|
||||
return false;
|
||||
|
||||
@@ -377,11 +377,6 @@ int32_t HwHelperHw<Family>::getDefaultThreadArbitrationPolicy() const {
|
||||
return ThreadArbitrationPolicy::RoundRobinAfterDependency;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::isCopyOnlyEngineType(EngineGroupType type) const {
|
||||
return (EngineGroupType::Copy == type || EngineGroupType::LinkedCopy == type);
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::isSubDeviceEngineSupported(const HardwareInfo &hwInfo, const DeviceBitfield &deviceBitfield, aub_stream::EngineType engineType) const {
|
||||
constexpr uint64_t tile1Bitfield = 0b10;
|
||||
|
||||
Reference in New Issue
Block a user