Enable implicit scaling via platform config

Related-To: NEO-6819
Signed-off-by: Daniel Chabrowski <daniel.chabrowski@intel.com>
This commit is contained in:
Daniel Chabrowski
2022-05-23 17:03:53 +00:00
committed by Compute-Runtime-Automation
parent 630ecfdd09
commit b5495169ca
13 changed files with 71 additions and 12 deletions

View File

@@ -1314,7 +1314,14 @@ TEST_F(DeviceCreateCommandQueueTest, givenLowPriorityDescAndWithoutLowPriorityCs
EXPECT_THROW(device->createCommandQueue(&desc, &commandQueueHandle), std::exception);
}
using MultiDeviceCreateCommandQueueTest = Test<MultiDeviceFixture>;
struct MultiDeviceCreateCommandQueueFixture : MultiDeviceFixture {
void SetUp() {
DebugManager.flags.EnableImplicitScaling = false;
MultiDeviceFixture::SetUp();
}
};
using MultiDeviceCreateCommandQueueTest = Test<MultiDeviceCreateCommandQueueFixture>;
TEST_F(MultiDeviceCreateCommandQueueTest, givenLowPriorityDescWhenCreateCommandQueueIsCalledThenLowPriorityCsrIsAssigned) {
auto device = driverHandle->devices[0];

View File

@@ -576,7 +576,15 @@ HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandListsWithCooperativeAndNo
pCommandQueue->destroy();
}
HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandListWithCooperativeKernelsWhenExecuteCommandListsIsCalledThenCorrectBatchBufferIsSubmitted, IsAtLeastXeHpCore) {
struct CommandQueueExecuteCommandListsImplicitScalingDisabled : CommandQueueExecuteCommandLists {
void SetUp() override {
DebugManager.flags.EnableImplicitScaling.set(0);
CommandQueueExecuteCommandLists::SetUp();
}
DebugManagerStateRestore restorer{};
};
HWTEST2_F(CommandQueueExecuteCommandListsImplicitScalingDisabled, givenCommandListWithCooperativeKernelsWhenExecuteCommandListsIsCalledThenCorrectBatchBufferIsSubmitted, IsAtLeastXeHpCore) {
struct MockCsr : NEO::CommandStreamReceiverHw<FamilyType> {
using NEO::CommandStreamReceiverHw<FamilyType>::CommandStreamReceiverHw;
NEO::SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override {

View File

@@ -2236,7 +2236,7 @@ TEST_F(MultipleDevicesTest, givenTwoSubDevicesFromTheSameRootDeviceThenCanAccess
EXPECT_TRUE(canAccess);
}
TEST_F(MultipleDevicesTest, givenTopologyForTwoSubdevicesWhenGettingApiSliceIdWithRootDeviceThenCorrectMappingIsUsedAndApiSliceIdsForSubdeviceReturned) {
TEST_F(MultipleDevicesDisabledImplicitScalingTest, givenTopologyForTwoSubdevicesWhenGettingApiSliceIdWithRootDeviceThenCorrectMappingIsUsedAndApiSliceIdsForSubdeviceReturned) {
L0::Device *device0 = driverHandle->devices[0];
auto deviceImp0 = static_cast<DeviceImp *>(device0);
auto hwInfo = device0->getHwInfo();
@@ -2279,7 +2279,7 @@ TEST_F(MultipleDevicesTest, givenTopologyForTwoSubdevicesWhenGettingApiSliceIdWi
EXPECT_EQ(hwInfo.gtSystemInfo.SliceCount + 0u, sliceId);
}
TEST_F(MultipleDevicesTest, givenTopologyForSingleSubdeviceWhenGettingApiSliceIdWithRootDeviceThenCorrectApiSliceIdsForFirstSubDeviceIsReturned) {
TEST_F(MultipleDevicesDisabledImplicitScalingTest, givenTopologyForSingleSubdeviceWhenGettingApiSliceIdWithRootDeviceThenCorrectApiSliceIdsForFirstSubDeviceIsReturned) {
L0::Device *device0 = driverHandle->devices[0];
auto deviceImp0 = static_cast<DeviceImp *>(device0);
auto hwInfo = device0->getHwInfo();

View File

@@ -1345,6 +1345,7 @@ struct ContextMemoryTests : public MemoryRelaxedSizeTests {
TEST_F(ContextMemoryTests, givenMultipleSubDevicesWhenAllocatingThenUseCorrectGlobalMemorySize) {
size_t allocationSize = neoDevice->getDeviceInfo().globalMemSize;
const size_t unsupportedAllocationSize = allocationSize + 1;
size_t alignment = 1u;
void *ptr = nullptr;
@@ -1352,11 +1353,11 @@ TEST_F(ContextMemoryTests, givenMultipleSubDevicesWhenAllocatingThenUseCorrectGl
ze_device_mem_alloc_desc_t deviceDesc = {};
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, allocationSize, alignment, &ptr);
ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, unsupportedAllocationSize, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result);
EXPECT_EQ(nullptr, ptr);
result = context->allocDeviceMem(device->toHandle(), &deviceDesc, allocationSize, alignment, &ptr);
result = context->allocDeviceMem(device->toHandle(), &deviceDesc, unsupportedAllocationSize, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result);
EXPECT_EQ(nullptr, ptr);

View File

@@ -32,7 +32,7 @@ To manage the resources on those sub-devices, the UMD introduces two main develo
* *Implicit scaling* model, on which application allocates and submits to the root device and driver is responsible for distribution of work and memory across tiles.
* *Explicit scaling* model, on which application is responsible for distributing work and memory across tiles using sub-device handles.
When doing allocations in implicit scaling mode, driver *colors* an allocation among the available tiles. Default coloring divides an allocation size evenly by the number of avaialable tiles. Other policies include dividing the allocation in chunks of a given size, which are then interleaved on each tile.
When doing allocations in implicit scaling mode, driver *colors* an allocation among the available tiles. Default coloring divides an allocation size evenly by the number of available tiles. Other policies include dividing the allocation in chunks of a given size, which are then interleaved on each tile.
When scheduling a kernel for execution, driver distributes the kernel workgroups among the available tiles. Default mechanism is called *Static Partitioning*, where the workgroups are evenly distributed among tiles. For instance, in a 2-tile system, half of the workgroups go to tile 0, and the other half to tile 1.
@@ -40,7 +40,7 @@ The number of CCSs, or compute engines, currently available with implicit scalin
No implicit scaling support is available for BCSs. Considering that, two models are followed in terms of discovery of copy engines:
* In Level Zero, the copy engines from sub-device 0 are exposed also in the root device. This to align the engine model on both the implicit and the non-implicit-scaling scenarios.
* In Level Zero, the copy engines from sub-device 0 are exposed also in the root device. This is to align the engine model on both the implicit and the non-implicit-scaling scenarios.
* In OpenCL, copy engines are not exposed in the root device.
Since implicit scaling is only done for EUs, which are associated only with kernels submitted to CCS, BCSs are currently not being exposed and access to them are done through sub-device handles.
@@ -76,4 +76,4 @@ For workloads with no coherent L3 caches among tiles, such as XeHP_SDV, the foll
* `ForceMultiGpuAtomics`: Set to `0` to have global atomics (slow mode for multi-tile) and `1` to have atomics on L3 cache (fast mode for on tile).
* Caches are flushed after every kernel. This can be disabled with `DoNotFlushCaches=1`.
* Kernels are serialized to maintain functional correctness of split execution.
* Kernels are serialized to maintain functional correctness of split execution.

View File

@@ -110,6 +110,7 @@ class HwInfoConfig {
virtual bool allowMemoryPrefetch(const HardwareInfo &hwInfo) const = 0;
virtual bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const = 0;
virtual bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const = 0;
virtual bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const = 0;
MOCKABLE_VIRTUAL ~HwInfoConfig() = default;
@@ -202,6 +203,7 @@ class HwInfoConfigHw : public HwInfoConfig {
bool allowMemoryPrefetch(const HardwareInfo &hwInfo) const override;
bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const override;
bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const override;
bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const override;
protected:
HwInfoConfigHw() = default;

View File

@@ -399,4 +399,9 @@ bool HwInfoConfigHw<gfxProduct>::isBlitCopyRequiredForLocalMemory(const Hardware
(HwInfoConfig::get(hwInfo.platform.eProductFamily)->getLocalMemoryAccessMode(hwInfo) == LocalMemoryAccessMode::CpuAccessDisallowed ||
!allocation.isAllocationLockable());
}
template <PRODUCT_FAMILY gfxProduct>
bool HwInfoConfigHw<gfxProduct>::isImplicitScalingSupported(const HardwareInfo &hwInfo) const {
return false;
}
} // namespace NEO

View File

@@ -134,3 +134,8 @@ template <>
bool HwInfoConfigHw<gfxProduct>::isBlitterForImagesSupported() const {
return true;
}
template <>
bool HwInfoConfigHw<gfxProduct>::isImplicitScalingSupported(const HardwareInfo &hwInfo) const {
return true;
}

View File

@@ -22,7 +22,7 @@ bool ImplicitScalingDispatch<Family>::platformSupportsImplicitScaling(const Hard
if (ApiSpecificConfig::getApiType() == ApiSpecificConfig::ApiType::OCL) {
return true;
} else {
return HwInfoConfig::get(hwInfo.platform.eProductFamily)->getSteppingFromHwRevId(hwInfo) >= REVISION_B;
return HwInfoConfig::get(hwInfo.platform.eProductFamily)->isImplicitScalingSupported(hwInfo);
}
}

View File

@@ -109,7 +109,7 @@ bool HwInfoConfigHw<gfxProduct>::isAdjustProgrammableIdPreferredSlmSizeRequired(
template <>
bool HwInfoConfigHw<gfxProduct>::isCooperativeEngineSupported(const HardwareInfo &hwInfo) const {
return (HwInfoConfig::get(hwInfo.platform.eProductFamily)->getSteppingFromHwRevId(hwInfo) >= REVISION_B);
return getSteppingFromHwRevId(hwInfo) >= REVISION_B;
}
bool isBaseDieA0(const HardwareInfo &hwInfo) {
@@ -174,3 +174,8 @@ bool HwInfoConfigHw<gfxProduct>::isBlitCopyRequiredForLocalMemory(const Hardware
return false;
}
template <>
bool HwInfoConfigHw<gfxProduct>::isImplicitScalingSupported(const HardwareInfo &hwInfo) const {
return getSteppingFromHwRevId(hwInfo) >= REVISION_B;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -136,3 +136,8 @@ XEHPTEST_F(TestXeHPHwInfoConfig, givenXeHpCoreWhenIsBlitterForImagesSupportedIsC
EXPECT_TRUE(hwInfoConfig.isBlitterForImagesSupported());
}
XEHPTEST_F(TestXeHPHwInfoConfig, givenHwInfoConfigWhenIsImplicitScalingSupportedThenExpectTrue) {
const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
EXPECT_TRUE(hwInfoConfig.isImplicitScalingSupported(*defaultHwInfo));
}

View File

@@ -69,3 +69,8 @@ HWTEST_F(HwInfoConfigTest, givenForceGrfNumProgrammingWithScmFlagSetWhenIsGrfNum
DebugManager.flags.ForceGrfNumProgrammingWithScm.set(1);
EXPECT_TRUE(hwInfoConfig.isGrfNumReportedWithScm());
}
HWTEST2_F(HwInfoConfigTest, givenHwInfoConfigWhenIsImplicitScalingSupportedThenExpectFalse, isNotXeHpOrXeHpcCore) {
const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
EXPECT_FALSE(hwInfoConfig.isImplicitScalingSupported(*defaultHwInfo));
}

View File

@@ -5,6 +5,7 @@
*
*/
#include "shared/source/helpers/constants.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/test_macros/test.h"
@@ -29,3 +30,18 @@ PVCTEST_F(PVCHwInfoConfig, givenPVCRevId0WhenGettingThreadEuRatioForScratchThen8
hwInfo.platform.usRevId = 0;
EXPECT_EQ(8u, hwInfoConfig.getThreadEuRatioForScratch(hwInfo));
}
PVCTEST_F(PVCHwInfoConfig, givenPVCWithDifferentSteppingsThenImplicitScalingIsEnabledForBAndHigher) {
const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
auto hwInfo = *defaultHwInfo;
for (uint32_t stepping = 0; stepping < 0x10; stepping++) {
auto hwRevIdFromStepping = hwInfoConfig.getHwRevIdFromStepping(stepping, hwInfo);
if (hwRevIdFromStepping != CommonConstants::invalidStepping) {
hwInfo.platform.usRevId = hwRevIdFromStepping;
const bool shouldSupportImplicitScaling = hwRevIdFromStepping >= REVISION_B;
EXPECT_EQ(shouldSupportImplicitScaling, hwInfoConfig.isImplicitScalingSupported(hwInfo)) << "hwRevId: " << hwRevIdFromStepping;
}
}
}