diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp index ffe0a01055..21df33fabd 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp @@ -1314,7 +1314,14 @@ TEST_F(DeviceCreateCommandQueueTest, givenLowPriorityDescAndWithoutLowPriorityCs EXPECT_THROW(device->createCommandQueue(&desc, &commandQueueHandle), std::exception); } -using MultiDeviceCreateCommandQueueTest = Test; +struct MultiDeviceCreateCommandQueueFixture : MultiDeviceFixture { + void SetUp() { + DebugManager.flags.EnableImplicitScaling = false; + MultiDeviceFixture::SetUp(); + } +}; + +using MultiDeviceCreateCommandQueueTest = Test; TEST_F(MultiDeviceCreateCommandQueueTest, givenLowPriorityDescWhenCreateCommandQueueIsCalledThenLowPriorityCsrIsAssigned) { auto device = driverHandle->devices[0]; diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp index 20f4b14ede..22eabfba09 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp @@ -576,7 +576,15 @@ HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandListsWithCooperativeAndNo pCommandQueue->destroy(); } -HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandListWithCooperativeKernelsWhenExecuteCommandListsIsCalledThenCorrectBatchBufferIsSubmitted, IsAtLeastXeHpCore) { +struct CommandQueueExecuteCommandListsImplicitScalingDisabled : CommandQueueExecuteCommandLists { + void SetUp() override { + DebugManager.flags.EnableImplicitScaling.set(0); + CommandQueueExecuteCommandLists::SetUp(); + } + DebugManagerStateRestore restorer{}; +}; + +HWTEST2_F(CommandQueueExecuteCommandListsImplicitScalingDisabled, givenCommandListWithCooperativeKernelsWhenExecuteCommandListsIsCalledThenCorrectBatchBufferIsSubmitted, IsAtLeastXeHpCore) { struct MockCsr : NEO::CommandStreamReceiverHw { using NEO::CommandStreamReceiverHw::CommandStreamReceiverHw; NEO::SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override { diff --git a/level_zero/core/test/unit_tests/sources/device/test_device.cpp b/level_zero/core/test/unit_tests/sources/device/test_device.cpp index a7cb026ef5..aa6066495c 100644 --- a/level_zero/core/test/unit_tests/sources/device/test_device.cpp +++ b/level_zero/core/test/unit_tests/sources/device/test_device.cpp @@ -2236,7 +2236,7 @@ TEST_F(MultipleDevicesTest, givenTwoSubDevicesFromTheSameRootDeviceThenCanAccess EXPECT_TRUE(canAccess); } -TEST_F(MultipleDevicesTest, givenTopologyForTwoSubdevicesWhenGettingApiSliceIdWithRootDeviceThenCorrectMappingIsUsedAndApiSliceIdsForSubdeviceReturned) { +TEST_F(MultipleDevicesDisabledImplicitScalingTest, givenTopologyForTwoSubdevicesWhenGettingApiSliceIdWithRootDeviceThenCorrectMappingIsUsedAndApiSliceIdsForSubdeviceReturned) { L0::Device *device0 = driverHandle->devices[0]; auto deviceImp0 = static_cast(device0); auto hwInfo = device0->getHwInfo(); @@ -2279,7 +2279,7 @@ TEST_F(MultipleDevicesTest, givenTopologyForTwoSubdevicesWhenGettingApiSliceIdWi EXPECT_EQ(hwInfo.gtSystemInfo.SliceCount + 0u, sliceId); } -TEST_F(MultipleDevicesTest, givenTopologyForSingleSubdeviceWhenGettingApiSliceIdWithRootDeviceThenCorrectApiSliceIdsForFirstSubDeviceIsReturned) { +TEST_F(MultipleDevicesDisabledImplicitScalingTest, givenTopologyForSingleSubdeviceWhenGettingApiSliceIdWithRootDeviceThenCorrectApiSliceIdsForFirstSubDeviceIsReturned) { L0::Device *device0 = driverHandle->devices[0]; auto deviceImp0 = static_cast(device0); auto hwInfo = device0->getHwInfo(); diff --git a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp index 615c2cda94..b4de49b593 100644 --- a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp @@ -1345,6 +1345,7 @@ struct ContextMemoryTests : public MemoryRelaxedSizeTests { TEST_F(ContextMemoryTests, givenMultipleSubDevicesWhenAllocatingThenUseCorrectGlobalMemorySize) { size_t allocationSize = neoDevice->getDeviceInfo().globalMemSize; + const size_t unsupportedAllocationSize = allocationSize + 1; size_t alignment = 1u; void *ptr = nullptr; @@ -1352,11 +1353,11 @@ TEST_F(ContextMemoryTests, givenMultipleSubDevicesWhenAllocatingThenUseCorrectGl ze_device_mem_alloc_desc_t deviceDesc = {}; deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; - ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, allocationSize, alignment, &ptr); + ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, unsupportedAllocationSize, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result); EXPECT_EQ(nullptr, ptr); - result = context->allocDeviceMem(device->toHandle(), &deviceDesc, allocationSize, alignment, &ptr); + result = context->allocDeviceMem(device->toHandle(), &deviceDesc, unsupportedAllocationSize, alignment, &ptr); EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_SIZE, result); EXPECT_EQ(nullptr, ptr); diff --git a/programmers-guide/IMPLICIT_SCALING.md b/programmers-guide/IMPLICIT_SCALING.md index b5d06ad7ef..26178b4ae6 100644 --- a/programmers-guide/IMPLICIT_SCALING.md +++ b/programmers-guide/IMPLICIT_SCALING.md @@ -32,7 +32,7 @@ To manage the resources on those sub-devices, the UMD introduces two main develo * *Implicit scaling* model, on which application allocates and submits to the root device and driver is responsible for distribution of work and memory across tiles. * *Explicit scaling* model, on which application is responsible for distributing work and memory across tiles using sub-device handles. -When doing allocations in implicit scaling mode, driver *colors* an allocation among the available tiles. Default coloring divides an allocation size evenly by the number of avaialable tiles. Other policies include dividing the allocation in chunks of a given size, which are then interleaved on each tile. +When doing allocations in implicit scaling mode, driver *colors* an allocation among the available tiles. Default coloring divides an allocation size evenly by the number of available tiles. Other policies include dividing the allocation in chunks of a given size, which are then interleaved on each tile. When scheduling a kernel for execution, driver distributes the kernel workgroups among the available tiles. Default mechanism is called *Static Partitioning*, where the workgroups are evenly distributed among tiles. For instance, in a 2-tile system, half of the workgroups go to tile 0, and the other half to tile 1. @@ -40,7 +40,7 @@ The number of CCSs, or compute engines, currently available with implicit scalin No implicit scaling support is available for BCSs. Considering that, two models are followed in terms of discovery of copy engines: -* In Level Zero, the copy engines from sub-device 0 are exposed also in the root device. This to align the engine model on both the implicit and the non-implicit-scaling scenarios. +* In Level Zero, the copy engines from sub-device 0 are exposed also in the root device. This is to align the engine model on both the implicit and the non-implicit-scaling scenarios. * In OpenCL, copy engines are not exposed in the root device. Since implicit scaling is only done for EUs, which are associated only with kernels submitted to CCS, BCSs are currently not being exposed and access to them are done through sub-device handles. @@ -76,4 +76,4 @@ For workloads with no coherent L3 caches among tiles, such as XeHP_SDV, the foll * `ForceMultiGpuAtomics`: Set to `0` to have global atomics (slow mode for multi-tile) and `1` to have atomics on L3 cache (fast mode for on tile). * Caches are flushed after every kernel. This can be disabled with `DoNotFlushCaches=1`. -* Kernels are serialized to maintain functional correctness of split execution. \ No newline at end of file +* Kernels are serialized to maintain functional correctness of split execution. diff --git a/shared/source/os_interface/hw_info_config.h b/shared/source/os_interface/hw_info_config.h index e06e702080..b9b619047f 100644 --- a/shared/source/os_interface/hw_info_config.h +++ b/shared/source/os_interface/hw_info_config.h @@ -110,6 +110,7 @@ class HwInfoConfig { virtual bool allowMemoryPrefetch(const HardwareInfo &hwInfo) const = 0; virtual bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const = 0; virtual bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const = 0; + virtual bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const = 0; MOCKABLE_VIRTUAL ~HwInfoConfig() = default; @@ -202,6 +203,7 @@ class HwInfoConfigHw : public HwInfoConfig { bool allowMemoryPrefetch(const HardwareInfo &hwInfo) const override; bool isBcsReportWaRequired(const HardwareInfo &hwInfo) const override; bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const override; + bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const override; protected: HwInfoConfigHw() = default; diff --git a/shared/source/os_interface/hw_info_config.inl b/shared/source/os_interface/hw_info_config.inl index 5f9a4a7447..53d53044e3 100644 --- a/shared/source/os_interface/hw_info_config.inl +++ b/shared/source/os_interface/hw_info_config.inl @@ -399,4 +399,9 @@ bool HwInfoConfigHw::isBlitCopyRequiredForLocalMemory(const Hardware (HwInfoConfig::get(hwInfo.platform.eProductFamily)->getLocalMemoryAccessMode(hwInfo) == LocalMemoryAccessMode::CpuAccessDisallowed || !allocation.isAllocationLockable()); } + +template +bool HwInfoConfigHw::isImplicitScalingSupported(const HardwareInfo &hwInfo) const { + return false; +} } // namespace NEO diff --git a/shared/source/xe_hp_core/os_agnostic_hw_info_config_xe_hp_core.inl b/shared/source/xe_hp_core/os_agnostic_hw_info_config_xe_hp_core.inl index 094106cdce..2b4f68a151 100644 --- a/shared/source/xe_hp_core/os_agnostic_hw_info_config_xe_hp_core.inl +++ b/shared/source/xe_hp_core/os_agnostic_hw_info_config_xe_hp_core.inl @@ -134,3 +134,8 @@ template <> bool HwInfoConfigHw::isBlitterForImagesSupported() const { return true; } + +template <> +bool HwInfoConfigHw::isImplicitScalingSupported(const HardwareInfo &hwInfo) const { + return true; +} diff --git a/shared/source/xe_hpc_core/implicit_scaling_xe_hpc_core.cpp b/shared/source/xe_hpc_core/implicit_scaling_xe_hpc_core.cpp index 238fbb40c8..3c151938ed 100644 --- a/shared/source/xe_hpc_core/implicit_scaling_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/implicit_scaling_xe_hpc_core.cpp @@ -22,7 +22,7 @@ bool ImplicitScalingDispatch::platformSupportsImplicitScaling(const Hard if (ApiSpecificConfig::getApiType() == ApiSpecificConfig::ApiType::OCL) { return true; } else { - return HwInfoConfig::get(hwInfo.platform.eProductFamily)->getSteppingFromHwRevId(hwInfo) >= REVISION_B; + return HwInfoConfig::get(hwInfo.platform.eProductFamily)->isImplicitScalingSupported(hwInfo); } } diff --git a/shared/source/xe_hpc_core/pvc/os_agnostic_hw_info_config_pvc.inl b/shared/source/xe_hpc_core/pvc/os_agnostic_hw_info_config_pvc.inl index d15360057c..11f4898062 100644 --- a/shared/source/xe_hpc_core/pvc/os_agnostic_hw_info_config_pvc.inl +++ b/shared/source/xe_hpc_core/pvc/os_agnostic_hw_info_config_pvc.inl @@ -109,7 +109,7 @@ bool HwInfoConfigHw::isAdjustProgrammableIdPreferredSlmSizeRequired( template <> bool HwInfoConfigHw::isCooperativeEngineSupported(const HardwareInfo &hwInfo) const { - return (HwInfoConfig::get(hwInfo.platform.eProductFamily)->getSteppingFromHwRevId(hwInfo) >= REVISION_B); + return getSteppingFromHwRevId(hwInfo) >= REVISION_B; } bool isBaseDieA0(const HardwareInfo &hwInfo) { @@ -174,3 +174,8 @@ bool HwInfoConfigHw::isBlitCopyRequiredForLocalMemory(const Hardware return false; } + +template <> +bool HwInfoConfigHw::isImplicitScalingSupported(const HardwareInfo &hwInfo) const { + return getSteppingFromHwRevId(hwInfo) >= REVISION_B; +} diff --git a/shared/test/common/xe_hp_core/test_hw_info_config_xe_hp_core.cpp b/shared/test/common/xe_hp_core/test_hw_info_config_xe_hp_core.cpp index a38157c0fb..b228cf1ca6 100644 --- a/shared/test/common/xe_hp_core/test_hw_info_config_xe_hp_core.cpp +++ b/shared/test/common/xe_hp_core/test_hw_info_config_xe_hp_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -136,3 +136,8 @@ XEHPTEST_F(TestXeHPHwInfoConfig, givenXeHpCoreWhenIsBlitterForImagesSupportedIsC EXPECT_TRUE(hwInfoConfig.isBlitterForImagesSupported()); } + +XEHPTEST_F(TestXeHPHwInfoConfig, givenHwInfoConfigWhenIsImplicitScalingSupportedThenExpectTrue) { + const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); + EXPECT_TRUE(hwInfoConfig.isImplicitScalingSupported(*defaultHwInfo)); +} diff --git a/shared/test/unit_test/helpers/test_hw_info_config.cpp b/shared/test/unit_test/helpers/test_hw_info_config.cpp index 84814894cb..6af2a67684 100644 --- a/shared/test/unit_test/helpers/test_hw_info_config.cpp +++ b/shared/test/unit_test/helpers/test_hw_info_config.cpp @@ -69,3 +69,8 @@ HWTEST_F(HwInfoConfigTest, givenForceGrfNumProgrammingWithScmFlagSetWhenIsGrfNum DebugManager.flags.ForceGrfNumProgrammingWithScm.set(1); EXPECT_TRUE(hwInfoConfig.isGrfNumReportedWithScm()); } + +HWTEST2_F(HwInfoConfigTest, givenHwInfoConfigWhenIsImplicitScalingSupportedThenExpectFalse, isNotXeHpOrXeHpcCore) { + const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); + EXPECT_FALSE(hwInfoConfig.isImplicitScalingSupported(*defaultHwInfo)); +} diff --git a/shared/test/unit_test/xe_hpc_core/pvc/test_hw_info_config_pvc.cpp b/shared/test/unit_test/xe_hpc_core/pvc/test_hw_info_config_pvc.cpp index 59ed77a75a..a393034972 100644 --- a/shared/test/unit_test/xe_hpc_core/pvc/test_hw_info_config_pvc.cpp +++ b/shared/test/unit_test/xe_hpc_core/pvc/test_hw_info_config_pvc.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/helpers/constants.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/test_macros/test.h" @@ -29,3 +30,18 @@ PVCTEST_F(PVCHwInfoConfig, givenPVCRevId0WhenGettingThreadEuRatioForScratchThen8 hwInfo.platform.usRevId = 0; EXPECT_EQ(8u, hwInfoConfig.getThreadEuRatioForScratch(hwInfo)); } + +PVCTEST_F(PVCHwInfoConfig, givenPVCWithDifferentSteppingsThenImplicitScalingIsEnabledForBAndHigher) { + const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); + + auto hwInfo = *defaultHwInfo; + + for (uint32_t stepping = 0; stepping < 0x10; stepping++) { + auto hwRevIdFromStepping = hwInfoConfig.getHwRevIdFromStepping(stepping, hwInfo); + if (hwRevIdFromStepping != CommonConstants::invalidStepping) { + hwInfo.platform.usRevId = hwRevIdFromStepping; + const bool shouldSupportImplicitScaling = hwRevIdFromStepping >= REVISION_B; + EXPECT_EQ(shouldSupportImplicitScaling, hwInfoConfig.isImplicitScalingSupported(hwInfo)) << "hwRevId: " << hwRevIdFromStepping; + } + } +}