From 1c68ac1cbc9945261381edbbc2baf1f050a44c5f Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Thu, 28 Oct 2021 07:52:24 +0000 Subject: [PATCH] Assign engine to command queue using round robin algorithm Signed-off-by: Lukasz Jobczyk --- opencl/source/command_queue/command_queue.cpp | 1 + opencl/source/command_queue/command_queue.h | 2 + .../source/command_queue/command_queue_hw.h | 20 ++- opencl/source/dll/CMakeLists.txt | 1 + opencl/source/dll/command_queue_dll.cpp | 14 +++ .../unit_test/device/sub_device_tests.cpp | 118 ++++++++++++++++++ .../hw_helper_tests_xehp_and_later.cpp | 5 + opencl/test/unit_test/libult/CMakeLists.txt | 1 + .../unit_test/libult/command_queue_ult.cpp | 16 +++ opencl/test/unit_test/linux/CMakeLists.txt | 1 + .../test/unit_test/linux/main_linux_dll.cpp | 5 + .../test/unit_test/test_files/igdrcl.config | 1 + .../debug_settings/debug_variables_base.inl | 1 + shared/source/device/device.cpp | 14 +++ shared/source/device/device.h | 2 + shared/source/helpers/hw_helper.h | 3 + .../helpers/hw_helper_bdw_and_later.inl | 5 + .../helpers/hw_helper_xehp_and_later.inl | 5 + shared/test/common/helpers/ult_hw_config.h | 1 + .../unit_test/base_ult_config_listener.cpp | 2 +- 20 files changed, 215 insertions(+), 3 deletions(-) create mode 100644 opencl/source/dll/command_queue_dll.cpp create mode 100644 opencl/test/unit_test/libult/command_queue_ult.cpp diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 5d074147bd..8c08657ee6 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -75,6 +75,7 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); gpgpuEngine = &device->getDefaultEngine(); + UNRECOVERABLE_IF(gpgpuEngine->getEngineType() >= aub_stream::EngineType::NUM_ENGINES); bool bcsAllowed = hwInfoConfig->isBlitterFullySupported(hwInfo) && diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 650afbb7ab..41b904064c 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -248,6 +248,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> { void allocateHeapMemory(IndirectHeap::Type heapType, size_t minRequiredSize, IndirectHeap *&indirectHeap); + static bool isAssignEngineRoundRobinEnabled(); + MOCKABLE_VIRTUAL void releaseIndirectHeap(IndirectHeap::Type heapType); void releaseVirtualEvent() { diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index 02a6f4c897..1ef9d8853e 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -63,6 +63,24 @@ class CommandQueueHw : public CommandQueue { this->gpgpuEngine = &device->getInternalEngine(); } + auto &hwInfo = device->getDevice().getHardwareInfo(); + auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); + + auto assignEngineRoundRobin = + !internalUsage && + !this->queueFamilySelected && + !(clPriority & static_cast(CL_QUEUE_PRIORITY_LOW_KHR)) && + hwHelper.isAssignEngineRoundRobinSupported() && + this->isAssignEngineRoundRobinEnabled(); + + if (DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get() != -1) { + assignEngineRoundRobin = DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get(); + } + + if (assignEngineRoundRobin) { + this->gpgpuEngine = &device->getDevice().getNextEngineForCommandQueue(); + } + if (getCmdQueueProperties(properties, CL_QUEUE_PROPERTIES) & static_cast(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) { getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch); if (DebugManager.flags.CsrDispatchMode.get() != 0) { @@ -77,8 +95,6 @@ class CommandQueueHw : public CommandQueue { auto &stateSaveAreaHeader = SipKernel::getSipKernel(device->getDevice()).getStateSaveAreaHeader(); if (stateSaveAreaHeader.size() > 0) { - auto &hwInfo = device->getDevice().getHardwareInfo(); - auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *debugSurface), device->getDevice(), debugSurface, 0, stateSaveAreaHeader.data(), stateSaveAreaHeader.size()); diff --git a/opencl/source/dll/CMakeLists.txt b/opencl/source/dll/CMakeLists.txt index 456234342c..f8f707bcb1 100644 --- a/opencl/source/dll/CMakeLists.txt +++ b/opencl/source/dll/CMakeLists.txt @@ -12,6 +12,7 @@ endif() set(RUNTIME_SRCS_DLL_BASE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_dll.cpp ${NEO_SHARED_DIRECTORY}/dll/create_deferred_deleter.cpp ${NEO_SHARED_DIRECTORY}/dll/create_memory_manager_${DRIVER_MODEL}.cpp ${NEO_SHARED_DIRECTORY}/dll/create_tbx_sockets.cpp diff --git a/opencl/source/dll/command_queue_dll.cpp b/opencl/source/dll/command_queue_dll.cpp new file mode 100644 index 0000000000..d556f2ff00 --- /dev/null +++ b/opencl/source/dll/command_queue_dll.cpp @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2020-2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "opencl/source/command_queue/command_queue.h" + +namespace NEO { +bool CommandQueue::isAssignEngineRoundRobinEnabled() { + return true; +} +} // namespace NEO \ No newline at end of file diff --git a/opencl/test/unit_test/device/sub_device_tests.cpp b/opencl/test/unit_test/device/sub_device_tests.cpp index 8fff762574..79cf90af30 100644 --- a/opencl/test/unit_test/device/sub_device_tests.cpp +++ b/opencl/test/unit_test/device/sub_device_tests.cpp @@ -19,6 +19,7 @@ #include "opencl/source/cl_device/cl_device.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" +#include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "opencl/test/unit_test/mocks/mock_platform.h" #include "opencl/test/unit_test/mocks/mock_program.h" @@ -934,6 +935,123 @@ HWTEST_F(EngineInstancedDeviceTests, givenEngineInstancedDeviceWhenCreatingProgr EXPECT_EQ(clSubSubDevice1, associatedSubDevices[1]); } +HWTEST_F(EngineInstancedDeviceTests, whenCreateMultipleCommandQueuesThenEnginesAreAssignedUsingRoundRobin) { + constexpr uint32_t genericDevicesCount = 1; + constexpr uint32_t ccsCount = 4; + + VariableBackup backup(&ultHwConfig); + ultHwConfig.useRoundRobindEngineAssign = true; + + if (!createDevices(genericDevicesCount, ccsCount)) { + GTEST_SKIP(); + } + + auto &hwInfo = rootDevice->getHardwareInfo(); + EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled); + + auto clRootDevice = std::make_unique(*rootDevice, nullptr); + cl_device_id device_ids[] = {clRootDevice.get()}; + ClDeviceVector deviceVector{device_ids, 1}; + MockContext context(deviceVector); + + std::array>, 24> cmdQs; + for (auto &cmdQ : cmdQs) { + cmdQ = std::make_unique>(&context, clRootDevice.get(), nullptr); + } + + const auto &defaultEngine = clRootDevice->getDefaultEngine(); + const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); + const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo); + + auto defaultEngineGroupIndex = clRootDevice->getDevice().getIndexOfNonEmptyEngineGroup(engineGroupType); + auto engines = clRootDevice->getDevice().getEngineGroups()[defaultEngineGroupIndex]; + + for (size_t i = 0; i < cmdQs.size(); i++) { + auto engineIndex = i % engines.size(); + auto expectedCsr = engines[engineIndex].commandStreamReceiver; + auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver(); + + EXPECT_EQ(csr, expectedCsr); + } +} + +HWTEST_F(EngineInstancedDeviceTests, givenEnableCmdQRoundRobindEngineAssignEnabledWhenCreateMultipleCommandQueuesThenEnginesAreAssignedUsingRoundRobin) { + constexpr uint32_t genericDevicesCount = 1; + constexpr uint32_t ccsCount = 4; + + DebugManagerStateRestore restorer; + DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1); + + if (!createDevices(genericDevicesCount, ccsCount)) { + GTEST_SKIP(); + } + + auto &hwInfo = rootDevice->getHardwareInfo(); + EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled); + + auto clRootDevice = std::make_unique(*rootDevice, nullptr); + cl_device_id device_ids[] = {clRootDevice.get()}; + ClDeviceVector deviceVector{device_ids, 1}; + MockContext context(deviceVector); + + std::array>, 24> cmdQs; + for (auto &cmdQ : cmdQs) { + cmdQ = std::make_unique>(&context, clRootDevice.get(), nullptr); + } + + const auto &defaultEngine = clRootDevice->getDefaultEngine(); + const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); + const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo); + + auto defaultEngineGroupIndex = clRootDevice->getDevice().getIndexOfNonEmptyEngineGroup(engineGroupType); + auto engines = clRootDevice->getDevice().getEngineGroups()[defaultEngineGroupIndex]; + + for (size_t i = 0; i < cmdQs.size(); i++) { + auto engineIndex = i % engines.size(); + auto expectedCsr = engines[engineIndex].commandStreamReceiver; + auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver(); + + EXPECT_EQ(csr, expectedCsr); + } +} + +HWTEST_F(EngineInstancedDeviceTests, givenEnableCmdQRoundRobindEngineAssignDisabledWenCreateMultipleCommandQueuesThenDefaultEngineAssigned) { + constexpr uint32_t genericDevicesCount = 1; + constexpr uint32_t ccsCount = 4; + + DebugManagerStateRestore restorer; + DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(0); + + VariableBackup backup(&ultHwConfig); + ultHwConfig.useRoundRobindEngineAssign = true; + + if (!createDevices(genericDevicesCount, ccsCount)) { + GTEST_SKIP(); + } + + auto &hwInfo = rootDevice->getHardwareInfo(); + EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled); + + auto clRootDevice = std::make_unique(*rootDevice, nullptr); + cl_device_id device_ids[] = {clRootDevice.get()}; + ClDeviceVector deviceVector{device_ids, 1}; + MockContext context(deviceVector); + + std::array>, 24> cmdQs; + for (auto &cmdQ : cmdQs) { + cmdQ = std::make_unique>(&context, clRootDevice.get(), nullptr); + } + + const auto &defaultEngine = clRootDevice->getDefaultEngine(); + + for (auto &cmdQ : cmdQs) { + auto expectedCsr = defaultEngine.commandStreamReceiver; + auto csr = &cmdQ->getGpgpuCommandStreamReceiver(); + + EXPECT_EQ(csr, expectedCsr); + } +} + TEST(SubDevicesTest, whenInitializeRootCsrThenDirectSubmissionIsNotInitialized) { auto device = std::make_unique(); device->initializeRootCommandStreamReceiver(); diff --git a/opencl/test/unit_test/helpers/hw_helper_tests_xehp_and_later.cpp b/opencl/test/unit_test/helpers/hw_helper_tests_xehp_and_later.cpp index f1a42384a3..1689ade059 100644 --- a/opencl/test/unit_test/helpers/hw_helper_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/helpers/hw_helper_tests_xehp_and_later.cpp @@ -84,6 +84,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenXeHPAndLaterPlatform EXPECT_TRUE(hwHelper.timestampPacketWriteSupported()); } +HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenXeHPAndLaterPlatformWhenCheckAssignEngineRoundRobinSupportedThenReturnTrue) { + auto &hwHelper = HwHelperHw::get(); + EXPECT_TRUE(hwHelper.isAssignEngineRoundRobinSupported()); +} + HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenAllFlagsSetWhenGetGpgpuEnginesThenReturnThreeRcsEnginesFourCcsEnginesAndOneBcsEngine) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.featureTable.ftrCCSNode = true; diff --git a/opencl/test/unit_test/libult/CMakeLists.txt b/opencl/test/unit_test/libult/CMakeLists.txt index fa0c860adb..a394bdd453 100644 --- a/opencl/test/unit_test/libult/CMakeLists.txt +++ b/opencl/test/unit_test/libult/CMakeLists.txt @@ -34,6 +34,7 @@ add_library(igdrcl_libult OBJECT EXCLUDE_FROM_ALL ) set(IGDRCL_SRCS_LIB_ULT_ENV + ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_ult.cpp ${NEO_SOURCE_DIR}/shared/test/common/helpers/custom_event_listener.h ${NEO_SOURCE_DIR}/opencl/test/unit_test/main.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/command_queue/command_queue_fixture.cpp diff --git a/opencl/test/unit_test/libult/command_queue_ult.cpp b/opencl/test/unit_test/libult/command_queue_ult.cpp new file mode 100644 index 0000000000..e266f67116 --- /dev/null +++ b/opencl/test/unit_test/libult/command_queue_ult.cpp @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2020-2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/test/common/helpers/ult_hw_config.h" + +#include "opencl/source/command_queue/command_queue.h" + +namespace NEO { +bool CommandQueue::isAssignEngineRoundRobinEnabled() { + return ultHwConfig.useRoundRobindEngineAssign; +} +} // namespace NEO \ No newline at end of file diff --git a/opencl/test/unit_test/linux/CMakeLists.txt b/opencl/test/unit_test/linux/CMakeLists.txt index 5c3bacec96..000429960b 100644 --- a/opencl/test/unit_test/linux/CMakeLists.txt +++ b/opencl/test/unit_test/linux/CMakeLists.txt @@ -27,6 +27,7 @@ add_executable(igdrcl_${target_name} ${NEO_SHARED_DIRECTORY}/dll/linux/drm_neo_create.cpp ${NEO_SHARED_DIRECTORY}/dll/linux/options_linux.cpp ${NEO_SHARED_DIRECTORY}/dll/linux/os_interface.cpp + ${NEO_SOURCE_DIR}/opencl/source/dll/command_queue_dll.cpp ${NEO_SOURCE_DIR}/opencl/source/os_interface/linux/platform_teardown_linux.cpp ${NEO_SOURCE_DIR}/opencl/test/unit_test/linux${BRANCH_DIR_SUFFIX}drm_other_requests.cpp ) diff --git a/opencl/test/unit_test/linux/main_linux_dll.cpp b/opencl/test/unit_test/linux/main_linux_dll.cpp index adad5bb5d1..1e4913517b 100644 --- a/opencl/test/unit_test/linux/main_linux_dll.cpp +++ b/opencl/test/unit_test/linux/main_linux_dll.cpp @@ -22,6 +22,7 @@ #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/os_interface/linux/device_command_stream_fixture.h" +#include "opencl/source/command_queue/command_queue.h" #include "opencl/source/platform/platform.h" #include "opencl/test/unit_test/linux/drm_wrap.h" #include "opencl/test/unit_test/linux/mock_os_layer.h" @@ -802,6 +803,10 @@ TEST(DirectSubmissionControllerTest, whenCheckDirectSubmissionControllerSupportT EXPECT_TRUE(DirectSubmissionController::isSupported()); } +TEST(CommandQueueTest, whenCheckEngineRoundRobinAssignThenReturnsTrue) { + EXPECT_TRUE(CommandQueue::isAssignEngineRoundRobinEnabled()); +} + TEST(PlatformsDestructor, whenGlobalPlatformsDestructorIsCalledThenGlobalPlatformsAreDestroyed) { EXPECT_NE(nullptr, platformsImpl); platformsDestructor(); diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index b3049dd187..a9fcd76ed0 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -313,6 +313,7 @@ OverrideNotifyEnableForTagUpdatePostSync = -1 OverrideUseKmdWaitFunction = -1 EnableCacheFlushAfterWalkerForAllQueues = -1 Force32BitDriverSupport = -1 +EnableCmdQRoundRobindEngineAssign = -1 OverrideCmdQueueSynchronousMode = -1 UseAtomicsForSelfCleanupSection = -1 HBMSizePerTileInGigabytes = 0 diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 4fd0a7560d..9b04d37fb2 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -307,6 +307,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableUserFenceForCompletionWait, -1, "-1: defau DECLARE_DEBUG_VARIABLE(int32_t, EnableUserFenceUseCtxId, -1, "-1: default (disabled), 0: disable, 1: enable : Use Context Id in Wait User Fence when waiting for completion tag") DECLARE_DEBUG_VARIABLE(int32_t, SetKmdWaitTimeout, -1, "-1: default (infinity), >0: amount of time units for wait function timeout") DECLARE_DEBUG_VARIABLE(int32_t, OverrideNotifyEnableForTagUpdatePostSync, -1, "-1: default (usage determined by user fence wait call), 0: disable use of NotifyEnable flag, 1: enable use NotifyEnable flag") +DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindEngineAssign, -1, "-1: default, 0: disable, 1: enable") DECLARE_DEBUG_VARIABLE(int32_t, Force32BitDriverSupport, -1, "-1: default, 0: disable, 1: enable, Forces the driver to support 32 bit.") DECLARE_DEBUG_VARIABLE(int32_t, OverrideSystolicPipelineSelect, -1, "set SYSTOLIC MODE ENABLE in PIPELINE_SELECT cmd, -1:default, 0:disable, 1:enable") DECLARE_DEBUG_VARIABLE(int32_t, OverrideSystolicInComputeWalker, -1, "set SYSTOLIC MODE ENABLE in COMPUTE_WALKER cmd, -1:default, 0:disable, 1:enable") diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index 79be1379d5..7d927ec011 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -565,6 +565,20 @@ EngineControl &Device::getInternalEngine() { return this->getNearestGenericSubDevice(0)->getEngine(engineType, EngineUsage::Internal); } +EngineControl &Device::getNextEngineForCommandQueue() { + const auto &defaultEngine = this->getDefaultEngine(); + + const auto &hardwareInfo = this->getHardwareInfo(); + const auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); + const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hardwareInfo); + + auto defaultEngineGroupIndex = this->getIndexOfNonEmptyEngineGroup(engineGroupType); + auto engines = this->getEngineGroups()[defaultEngineGroupIndex]; + + auto engineIndex = this->regularCommandQueuesCreatedWithinDeviceCount++ % engines.size(); + return this->getEngineGroups()[defaultEngineGroupIndex][engineIndex]; +} + EngineControl *Device::getInternalCopyEngine() { if (!getHardwareInfo().capabilityTable.blitterOperationsSupported) { return nullptr; diff --git a/shared/source/device/device.h b/shared/source/device/device.h index e7c0b04714..8812b85d08 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -65,6 +65,7 @@ class Device : public ReferenceTrackedObject { size_t getIndexOfNonEmptyEngineGroup(EngineGroupType engineGroupType) const; EngineControl &getEngine(uint32_t index); EngineControl &getDefaultEngine(); + EngineControl &getNextEngineForCommandQueue(); EngineControl &getInternalEngine(); EngineControl *getInternalCopyEngine(); SelectorCopyEngine &getSelectorCopyEngine(); @@ -172,6 +173,7 @@ class Device : public ReferenceTrackedObject { aub_stream::EngineType engineInstancedType = aub_stream::EngineType::NUM_ENGINES; uint32_t defaultEngineIndex = 0; uint32_t numSubDevices = 0; + std::atomic_uint32_t regularCommandQueuesCreatedWithinDeviceCount{0}; bool hasGenericSubDevices = false; bool engineInstanced = false; bool rootCsrCreated = false; diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index 954d62a37b..420d348443 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -108,6 +108,7 @@ class HwHelper { virtual bool useOnlyGlobalTimestamps() const = 0; virtual bool useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const = 0; virtual bool packedFormatsSupported() const = 0; + virtual bool isAssignEngineRoundRobinSupported() const = 0; virtual bool isRcsAvailable(const HardwareInfo &hwInfo) const = 0; virtual bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const = 0; virtual uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType, @@ -357,6 +358,8 @@ class HwHelperHw : public HwHelper { bool additionalPipeControlArgsRequired() const override; + bool isAssignEngineRoundRobinSupported() const override; + bool isEngineTypeRemappingToHwSpecificRequired() const override; bool isSipKernelAsHexadecimalArrayPreferred() const override; diff --git a/shared/source/helpers/hw_helper_bdw_and_later.inl b/shared/source/helpers/hw_helper_bdw_and_later.inl index 3e94ae232b..1d08fad811 100644 --- a/shared/source/helpers/hw_helper_bdw_and_later.inl +++ b/shared/source/helpers/hw_helper_bdw_and_later.inl @@ -40,6 +40,11 @@ bool HwHelperHw::timestampPacketWriteSupported() const { return false; } +template +bool HwHelperHw::isAssignEngineRoundRobinSupported() const { + return false; +} + template const EngineInstancesContainer HwHelperHw::getGpgpuEngineInstances(const HardwareInfo &hwInfo) const { return { diff --git a/shared/source/helpers/hw_helper_xehp_and_later.inl b/shared/source/helpers/hw_helper_xehp_and_later.inl index 21133ba2b4..0c29c6cfa2 100644 --- a/shared/source/helpers/hw_helper_xehp_and_later.inl +++ b/shared/source/helpers/hw_helper_xehp_and_later.inl @@ -133,6 +133,11 @@ uint32_t HwHelperHw::getPlanarYuvMaxHeight() const { return planarYuvMaxHeight; } +template +bool HwHelperHw::isAssignEngineRoundRobinSupported() const { + return true; +} + template aub_stream::MMIOList HwHelperHw::getExtraMmioList(const HardwareInfo &hwInfo, const GmmHelper &gmmHelper) const { aub_stream::MMIOList mmioList; diff --git a/shared/test/common/helpers/ult_hw_config.h b/shared/test/common/helpers/ult_hw_config.h index 328c9e3080..b697d13565 100644 --- a/shared/test/common/helpers/ult_hw_config.h +++ b/shared/test/common/helpers/ult_hw_config.h @@ -12,6 +12,7 @@ struct UltHwConfig { bool useHwCsr = false; bool useMockedPrepareDeviceEnvironmentsFunc = true; bool forceOsAgnosticMemoryManager = true; + bool useRoundRobindEngineAssign = false; bool csrFailInitDirectSubmission = false; bool csrBaseCallDirectSubmissionAvailable = false; diff --git a/shared/test/unit_test/base_ult_config_listener.cpp b/shared/test/unit_test/base_ult_config_listener.cpp index 96182fce51..3a42bb0271 100644 --- a/shared/test/unit_test/base_ult_config_listener.cpp +++ b/shared/test/unit_test/base_ult_config_listener.cpp @@ -32,7 +32,7 @@ void NEO::BaseUltConfigListener::OnTestEnd(const ::testing::TestInfo &) { // Ensure that global state is restored UltHwConfig expectedState{}; - static_assert(sizeof(UltHwConfig) == 11 * sizeof(bool), ""); // Ensure that there is no internal padding + static_assert(sizeof(UltHwConfig) == 12 * sizeof(bool), ""); // Ensure that there is no internal padding EXPECT_EQ(0, memcmp(&expectedState, &ultHwConfig, sizeof(UltHwConfig))); EXPECT_EQ(0, memcmp(&referencedHwInfo, defaultHwInfo.get(), sizeof(HardwareInfo))); }