mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Assign engine to command queue using round robin algorithm
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
cc2ba84fc8
commit
1c68ac1cbc
@@ -75,6 +75,7 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr
|
|||||||
auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||||
|
|
||||||
gpgpuEngine = &device->getDefaultEngine();
|
gpgpuEngine = &device->getDefaultEngine();
|
||||||
|
|
||||||
UNRECOVERABLE_IF(gpgpuEngine->getEngineType() >= aub_stream::EngineType::NUM_ENGINES);
|
UNRECOVERABLE_IF(gpgpuEngine->getEngineType() >= aub_stream::EngineType::NUM_ENGINES);
|
||||||
|
|
||||||
bool bcsAllowed = hwInfoConfig->isBlitterFullySupported(hwInfo) &&
|
bool bcsAllowed = hwInfoConfig->isBlitterFullySupported(hwInfo) &&
|
||||||
|
|||||||
@@ -248,6 +248,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
|||||||
void allocateHeapMemory(IndirectHeap::Type heapType,
|
void allocateHeapMemory(IndirectHeap::Type heapType,
|
||||||
size_t minRequiredSize, IndirectHeap *&indirectHeap);
|
size_t minRequiredSize, IndirectHeap *&indirectHeap);
|
||||||
|
|
||||||
|
static bool isAssignEngineRoundRobinEnabled();
|
||||||
|
|
||||||
MOCKABLE_VIRTUAL void releaseIndirectHeap(IndirectHeap::Type heapType);
|
MOCKABLE_VIRTUAL void releaseIndirectHeap(IndirectHeap::Type heapType);
|
||||||
|
|
||||||
void releaseVirtualEvent() {
|
void releaseVirtualEvent() {
|
||||||
|
|||||||
@@ -63,6 +63,24 @@ class CommandQueueHw : public CommandQueue {
|
|||||||
this->gpgpuEngine = &device->getInternalEngine();
|
this->gpgpuEngine = &device->getInternalEngine();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto &hwInfo = device->getDevice().getHardwareInfo();
|
||||||
|
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||||
|
|
||||||
|
auto assignEngineRoundRobin =
|
||||||
|
!internalUsage &&
|
||||||
|
!this->queueFamilySelected &&
|
||||||
|
!(clPriority & static_cast<cl_queue_priority_khr>(CL_QUEUE_PRIORITY_LOW_KHR)) &&
|
||||||
|
hwHelper.isAssignEngineRoundRobinSupported() &&
|
||||||
|
this->isAssignEngineRoundRobinEnabled();
|
||||||
|
|
||||||
|
if (DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get() != -1) {
|
||||||
|
assignEngineRoundRobin = DebugManager.flags.EnableCmdQRoundRobindEngineAssign.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (assignEngineRoundRobin) {
|
||||||
|
this->gpgpuEngine = &device->getDevice().getNextEngineForCommandQueue();
|
||||||
|
}
|
||||||
|
|
||||||
if (getCmdQueueProperties<cl_queue_properties>(properties, CL_QUEUE_PROPERTIES) & static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
|
if (getCmdQueueProperties<cl_queue_properties>(properties, CL_QUEUE_PROPERTIES) & static_cast<cl_queue_properties>(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
|
||||||
getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||||
if (DebugManager.flags.CsrDispatchMode.get() != 0) {
|
if (DebugManager.flags.CsrDispatchMode.get() != 0) {
|
||||||
@@ -77,8 +95,6 @@ class CommandQueueHw : public CommandQueue {
|
|||||||
|
|
||||||
auto &stateSaveAreaHeader = SipKernel::getSipKernel(device->getDevice()).getStateSaveAreaHeader();
|
auto &stateSaveAreaHeader = SipKernel::getSipKernel(device->getDevice()).getStateSaveAreaHeader();
|
||||||
if (stateSaveAreaHeader.size() > 0) {
|
if (stateSaveAreaHeader.size() > 0) {
|
||||||
auto &hwInfo = device->getDevice().getHardwareInfo();
|
|
||||||
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
|
||||||
NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *debugSurface),
|
NEO::MemoryTransferHelper::transferMemoryToAllocation(hwHelper.isBlitCopyRequiredForLocalMemory(hwInfo, *debugSurface),
|
||||||
device->getDevice(), debugSurface, 0, stateSaveAreaHeader.data(),
|
device->getDevice(), debugSurface, 0, stateSaveAreaHeader.data(),
|
||||||
stateSaveAreaHeader.size());
|
stateSaveAreaHeader.size());
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ endif()
|
|||||||
|
|
||||||
set(RUNTIME_SRCS_DLL_BASE
|
set(RUNTIME_SRCS_DLL_BASE
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_dll.cpp
|
||||||
${NEO_SHARED_DIRECTORY}/dll/create_deferred_deleter.cpp
|
${NEO_SHARED_DIRECTORY}/dll/create_deferred_deleter.cpp
|
||||||
${NEO_SHARED_DIRECTORY}/dll/create_memory_manager_${DRIVER_MODEL}.cpp
|
${NEO_SHARED_DIRECTORY}/dll/create_memory_manager_${DRIVER_MODEL}.cpp
|
||||||
${NEO_SHARED_DIRECTORY}/dll/create_tbx_sockets.cpp
|
${NEO_SHARED_DIRECTORY}/dll/create_tbx_sockets.cpp
|
||||||
|
|||||||
14
opencl/source/dll/command_queue_dll.cpp
Normal file
14
opencl/source/dll/command_queue_dll.cpp
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2020-2021 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "opencl/source/command_queue/command_queue.h"
|
||||||
|
|
||||||
|
namespace NEO {
|
||||||
|
bool CommandQueue::isAssignEngineRoundRobinEnabled() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} // namespace NEO
|
||||||
@@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
#include "opencl/source/cl_device/cl_device.h"
|
#include "opencl/source/cl_device/cl_device.h"
|
||||||
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
|
||||||
|
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||||
#include "opencl/test/unit_test/mocks/mock_platform.h"
|
#include "opencl/test/unit_test/mocks/mock_platform.h"
|
||||||
#include "opencl/test/unit_test/mocks/mock_program.h"
|
#include "opencl/test/unit_test/mocks/mock_program.h"
|
||||||
@@ -934,6 +935,123 @@ HWTEST_F(EngineInstancedDeviceTests, givenEngineInstancedDeviceWhenCreatingProgr
|
|||||||
EXPECT_EQ(clSubSubDevice1, associatedSubDevices[1]);
|
EXPECT_EQ(clSubSubDevice1, associatedSubDevices[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(EngineInstancedDeviceTests, whenCreateMultipleCommandQueuesThenEnginesAreAssignedUsingRoundRobin) {
|
||||||
|
constexpr uint32_t genericDevicesCount = 1;
|
||||||
|
constexpr uint32_t ccsCount = 4;
|
||||||
|
|
||||||
|
VariableBackup<UltHwConfig> backup(&ultHwConfig);
|
||||||
|
ultHwConfig.useRoundRobindEngineAssign = true;
|
||||||
|
|
||||||
|
if (!createDevices(genericDevicesCount, ccsCount)) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto &hwInfo = rootDevice->getHardwareInfo();
|
||||||
|
EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
|
||||||
|
|
||||||
|
auto clRootDevice = std::make_unique<ClDevice>(*rootDevice, nullptr);
|
||||||
|
cl_device_id device_ids[] = {clRootDevice.get()};
|
||||||
|
ClDeviceVector deviceVector{device_ids, 1};
|
||||||
|
MockContext context(deviceVector);
|
||||||
|
|
||||||
|
std::array<std::unique_ptr<MockCommandQueueHw<FamilyType>>, 24> cmdQs;
|
||||||
|
for (auto &cmdQ : cmdQs) {
|
||||||
|
cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, clRootDevice.get(), nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto &defaultEngine = clRootDevice->getDefaultEngine();
|
||||||
|
const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||||
|
const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo);
|
||||||
|
|
||||||
|
auto defaultEngineGroupIndex = clRootDevice->getDevice().getIndexOfNonEmptyEngineGroup(engineGroupType);
|
||||||
|
auto engines = clRootDevice->getDevice().getEngineGroups()[defaultEngineGroupIndex];
|
||||||
|
|
||||||
|
for (size_t i = 0; i < cmdQs.size(); i++) {
|
||||||
|
auto engineIndex = i % engines.size();
|
||||||
|
auto expectedCsr = engines[engineIndex].commandStreamReceiver;
|
||||||
|
auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver();
|
||||||
|
|
||||||
|
EXPECT_EQ(csr, expectedCsr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(EngineInstancedDeviceTests, givenEnableCmdQRoundRobindEngineAssignEnabledWhenCreateMultipleCommandQueuesThenEnginesAreAssignedUsingRoundRobin) {
|
||||||
|
constexpr uint32_t genericDevicesCount = 1;
|
||||||
|
constexpr uint32_t ccsCount = 4;
|
||||||
|
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1);
|
||||||
|
|
||||||
|
if (!createDevices(genericDevicesCount, ccsCount)) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto &hwInfo = rootDevice->getHardwareInfo();
|
||||||
|
EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
|
||||||
|
|
||||||
|
auto clRootDevice = std::make_unique<ClDevice>(*rootDevice, nullptr);
|
||||||
|
cl_device_id device_ids[] = {clRootDevice.get()};
|
||||||
|
ClDeviceVector deviceVector{device_ids, 1};
|
||||||
|
MockContext context(deviceVector);
|
||||||
|
|
||||||
|
std::array<std::unique_ptr<MockCommandQueueHw<FamilyType>>, 24> cmdQs;
|
||||||
|
for (auto &cmdQ : cmdQs) {
|
||||||
|
cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, clRootDevice.get(), nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto &defaultEngine = clRootDevice->getDefaultEngine();
|
||||||
|
const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||||
|
const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo);
|
||||||
|
|
||||||
|
auto defaultEngineGroupIndex = clRootDevice->getDevice().getIndexOfNonEmptyEngineGroup(engineGroupType);
|
||||||
|
auto engines = clRootDevice->getDevice().getEngineGroups()[defaultEngineGroupIndex];
|
||||||
|
|
||||||
|
for (size_t i = 0; i < cmdQs.size(); i++) {
|
||||||
|
auto engineIndex = i % engines.size();
|
||||||
|
auto expectedCsr = engines[engineIndex].commandStreamReceiver;
|
||||||
|
auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver();
|
||||||
|
|
||||||
|
EXPECT_EQ(csr, expectedCsr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(EngineInstancedDeviceTests, givenEnableCmdQRoundRobindEngineAssignDisabledWenCreateMultipleCommandQueuesThenDefaultEngineAssigned) {
|
||||||
|
constexpr uint32_t genericDevicesCount = 1;
|
||||||
|
constexpr uint32_t ccsCount = 4;
|
||||||
|
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(0);
|
||||||
|
|
||||||
|
VariableBackup<UltHwConfig> backup(&ultHwConfig);
|
||||||
|
ultHwConfig.useRoundRobindEngineAssign = true;
|
||||||
|
|
||||||
|
if (!createDevices(genericDevicesCount, ccsCount)) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto &hwInfo = rootDevice->getHardwareInfo();
|
||||||
|
EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
|
||||||
|
|
||||||
|
auto clRootDevice = std::make_unique<ClDevice>(*rootDevice, nullptr);
|
||||||
|
cl_device_id device_ids[] = {clRootDevice.get()};
|
||||||
|
ClDeviceVector deviceVector{device_ids, 1};
|
||||||
|
MockContext context(deviceVector);
|
||||||
|
|
||||||
|
std::array<std::unique_ptr<MockCommandQueueHw<FamilyType>>, 24> cmdQs;
|
||||||
|
for (auto &cmdQ : cmdQs) {
|
||||||
|
cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, clRootDevice.get(), nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto &defaultEngine = clRootDevice->getDefaultEngine();
|
||||||
|
|
||||||
|
for (auto &cmdQ : cmdQs) {
|
||||||
|
auto expectedCsr = defaultEngine.commandStreamReceiver;
|
||||||
|
auto csr = &cmdQ->getGpgpuCommandStreamReceiver();
|
||||||
|
|
||||||
|
EXPECT_EQ(csr, expectedCsr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST(SubDevicesTest, whenInitializeRootCsrThenDirectSubmissionIsNotInitialized) {
|
TEST(SubDevicesTest, whenInitializeRootCsrThenDirectSubmissionIsNotInitialized) {
|
||||||
auto device = std::make_unique<MockDevice>();
|
auto device = std::make_unique<MockDevice>();
|
||||||
device->initializeRootCommandStreamReceiver();
|
device->initializeRootCommandStreamReceiver();
|
||||||
|
|||||||
@@ -84,6 +84,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenXeHPAndLaterPlatform
|
|||||||
EXPECT_TRUE(hwHelper.timestampPacketWriteSupported());
|
EXPECT_TRUE(hwHelper.timestampPacketWriteSupported());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenXeHPAndLaterPlatformWhenCheckAssignEngineRoundRobinSupportedThenReturnTrue) {
|
||||||
|
auto &hwHelper = HwHelperHw<FamilyType>::get();
|
||||||
|
EXPECT_TRUE(hwHelper.isAssignEngineRoundRobinSupported());
|
||||||
|
}
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenAllFlagsSetWhenGetGpgpuEnginesThenReturnThreeRcsEnginesFourCcsEnginesAndOneBcsEngine) {
|
HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenAllFlagsSetWhenGetGpgpuEnginesThenReturnThreeRcsEnginesFourCcsEnginesAndOneBcsEngine) {
|
||||||
HardwareInfo hwInfo = *defaultHwInfo;
|
HardwareInfo hwInfo = *defaultHwInfo;
|
||||||
hwInfo.featureTable.ftrCCSNode = true;
|
hwInfo.featureTable.ftrCCSNode = true;
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ add_library(igdrcl_libult OBJECT EXCLUDE_FROM_ALL
|
|||||||
)
|
)
|
||||||
|
|
||||||
set(IGDRCL_SRCS_LIB_ULT_ENV
|
set(IGDRCL_SRCS_LIB_ULT_ENV
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_ult.cpp
|
||||||
${NEO_SOURCE_DIR}/shared/test/common/helpers/custom_event_listener.h
|
${NEO_SOURCE_DIR}/shared/test/common/helpers/custom_event_listener.h
|
||||||
${NEO_SOURCE_DIR}/opencl/test/unit_test/main.cpp
|
${NEO_SOURCE_DIR}/opencl/test/unit_test/main.cpp
|
||||||
${NEO_SOURCE_DIR}/opencl/test/unit_test/command_queue/command_queue_fixture.cpp
|
${NEO_SOURCE_DIR}/opencl/test/unit_test/command_queue/command_queue_fixture.cpp
|
||||||
|
|||||||
16
opencl/test/unit_test/libult/command_queue_ult.cpp
Normal file
16
opencl/test/unit_test/libult/command_queue_ult.cpp
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2020-2021 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "shared/test/common/helpers/ult_hw_config.h"
|
||||||
|
|
||||||
|
#include "opencl/source/command_queue/command_queue.h"
|
||||||
|
|
||||||
|
namespace NEO {
|
||||||
|
bool CommandQueue::isAssignEngineRoundRobinEnabled() {
|
||||||
|
return ultHwConfig.useRoundRobindEngineAssign;
|
||||||
|
}
|
||||||
|
} // namespace NEO
|
||||||
@@ -27,6 +27,7 @@ add_executable(igdrcl_${target_name}
|
|||||||
${NEO_SHARED_DIRECTORY}/dll/linux/drm_neo_create.cpp
|
${NEO_SHARED_DIRECTORY}/dll/linux/drm_neo_create.cpp
|
||||||
${NEO_SHARED_DIRECTORY}/dll/linux/options_linux.cpp
|
${NEO_SHARED_DIRECTORY}/dll/linux/options_linux.cpp
|
||||||
${NEO_SHARED_DIRECTORY}/dll/linux/os_interface.cpp
|
${NEO_SHARED_DIRECTORY}/dll/linux/os_interface.cpp
|
||||||
|
${NEO_SOURCE_DIR}/opencl/source/dll/command_queue_dll.cpp
|
||||||
${NEO_SOURCE_DIR}/opencl/source/os_interface/linux/platform_teardown_linux.cpp
|
${NEO_SOURCE_DIR}/opencl/source/os_interface/linux/platform_teardown_linux.cpp
|
||||||
${NEO_SOURCE_DIR}/opencl/test/unit_test/linux${BRANCH_DIR_SUFFIX}drm_other_requests.cpp
|
${NEO_SOURCE_DIR}/opencl/test/unit_test/linux${BRANCH_DIR_SUFFIX}drm_other_requests.cpp
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -22,6 +22,7 @@
|
|||||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||||
#include "shared/test/common/os_interface/linux/device_command_stream_fixture.h"
|
#include "shared/test/common/os_interface/linux/device_command_stream_fixture.h"
|
||||||
|
|
||||||
|
#include "opencl/source/command_queue/command_queue.h"
|
||||||
#include "opencl/source/platform/platform.h"
|
#include "opencl/source/platform/platform.h"
|
||||||
#include "opencl/test/unit_test/linux/drm_wrap.h"
|
#include "opencl/test/unit_test/linux/drm_wrap.h"
|
||||||
#include "opencl/test/unit_test/linux/mock_os_layer.h"
|
#include "opencl/test/unit_test/linux/mock_os_layer.h"
|
||||||
@@ -802,6 +803,10 @@ TEST(DirectSubmissionControllerTest, whenCheckDirectSubmissionControllerSupportT
|
|||||||
EXPECT_TRUE(DirectSubmissionController::isSupported());
|
EXPECT_TRUE(DirectSubmissionController::isSupported());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(CommandQueueTest, whenCheckEngineRoundRobinAssignThenReturnsTrue) {
|
||||||
|
EXPECT_TRUE(CommandQueue::isAssignEngineRoundRobinEnabled());
|
||||||
|
}
|
||||||
|
|
||||||
TEST(PlatformsDestructor, whenGlobalPlatformsDestructorIsCalledThenGlobalPlatformsAreDestroyed) {
|
TEST(PlatformsDestructor, whenGlobalPlatformsDestructorIsCalledThenGlobalPlatformsAreDestroyed) {
|
||||||
EXPECT_NE(nullptr, platformsImpl);
|
EXPECT_NE(nullptr, platformsImpl);
|
||||||
platformsDestructor();
|
platformsDestructor();
|
||||||
|
|||||||
@@ -313,6 +313,7 @@ OverrideNotifyEnableForTagUpdatePostSync = -1
|
|||||||
OverrideUseKmdWaitFunction = -1
|
OverrideUseKmdWaitFunction = -1
|
||||||
EnableCacheFlushAfterWalkerForAllQueues = -1
|
EnableCacheFlushAfterWalkerForAllQueues = -1
|
||||||
Force32BitDriverSupport = -1
|
Force32BitDriverSupport = -1
|
||||||
|
EnableCmdQRoundRobindEngineAssign = -1
|
||||||
OverrideCmdQueueSynchronousMode = -1
|
OverrideCmdQueueSynchronousMode = -1
|
||||||
UseAtomicsForSelfCleanupSection = -1
|
UseAtomicsForSelfCleanupSection = -1
|
||||||
HBMSizePerTileInGigabytes = 0
|
HBMSizePerTileInGigabytes = 0
|
||||||
|
|||||||
@@ -307,6 +307,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableUserFenceForCompletionWait, -1, "-1: defau
|
|||||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableUserFenceUseCtxId, -1, "-1: default (disabled), 0: disable, 1: enable : Use Context Id in Wait User Fence when waiting for completion tag")
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableUserFenceUseCtxId, -1, "-1: default (disabled), 0: disable, 1: enable : Use Context Id in Wait User Fence when waiting for completion tag")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, SetKmdWaitTimeout, -1, "-1: default (infinity), >0: amount of time units for wait function timeout")
|
DECLARE_DEBUG_VARIABLE(int32_t, SetKmdWaitTimeout, -1, "-1: default (infinity), >0: amount of time units for wait function timeout")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideNotifyEnableForTagUpdatePostSync, -1, "-1: default (usage determined by user fence wait call), 0: disable use of NotifyEnable flag, 1: enable use NotifyEnable flag")
|
DECLARE_DEBUG_VARIABLE(int32_t, OverrideNotifyEnableForTagUpdatePostSync, -1, "-1: default (usage determined by user fence wait call), 0: disable use of NotifyEnable flag, 1: enable use NotifyEnable flag")
|
||||||
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindEngineAssign, -1, "-1: default, 0: disable, 1: enable")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, Force32BitDriverSupport, -1, "-1: default, 0: disable, 1: enable, Forces the driver to support 32 bit.")
|
DECLARE_DEBUG_VARIABLE(int32_t, Force32BitDriverSupport, -1, "-1: default, 0: disable, 1: enable, Forces the driver to support 32 bit.")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideSystolicPipelineSelect, -1, "set SYSTOLIC MODE ENABLE in PIPELINE_SELECT cmd, -1:default, 0:disable, 1:enable")
|
DECLARE_DEBUG_VARIABLE(int32_t, OverrideSystolicPipelineSelect, -1, "set SYSTOLIC MODE ENABLE in PIPELINE_SELECT cmd, -1:default, 0:disable, 1:enable")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideSystolicInComputeWalker, -1, "set SYSTOLIC MODE ENABLE in COMPUTE_WALKER cmd, -1:default, 0:disable, 1:enable")
|
DECLARE_DEBUG_VARIABLE(int32_t, OverrideSystolicInComputeWalker, -1, "set SYSTOLIC MODE ENABLE in COMPUTE_WALKER cmd, -1:default, 0:disable, 1:enable")
|
||||||
|
|||||||
@@ -565,6 +565,20 @@ EngineControl &Device::getInternalEngine() {
|
|||||||
return this->getNearestGenericSubDevice(0)->getEngine(engineType, EngineUsage::Internal);
|
return this->getNearestGenericSubDevice(0)->getEngine(engineType, EngineUsage::Internal);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EngineControl &Device::getNextEngineForCommandQueue() {
|
||||||
|
const auto &defaultEngine = this->getDefaultEngine();
|
||||||
|
|
||||||
|
const auto &hardwareInfo = this->getHardwareInfo();
|
||||||
|
const auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||||
|
const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hardwareInfo);
|
||||||
|
|
||||||
|
auto defaultEngineGroupIndex = this->getIndexOfNonEmptyEngineGroup(engineGroupType);
|
||||||
|
auto engines = this->getEngineGroups()[defaultEngineGroupIndex];
|
||||||
|
|
||||||
|
auto engineIndex = this->regularCommandQueuesCreatedWithinDeviceCount++ % engines.size();
|
||||||
|
return this->getEngineGroups()[defaultEngineGroupIndex][engineIndex];
|
||||||
|
}
|
||||||
|
|
||||||
EngineControl *Device::getInternalCopyEngine() {
|
EngineControl *Device::getInternalCopyEngine() {
|
||||||
if (!getHardwareInfo().capabilityTable.blitterOperationsSupported) {
|
if (!getHardwareInfo().capabilityTable.blitterOperationsSupported) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|||||||
@@ -65,6 +65,7 @@ class Device : public ReferenceTrackedObject<Device> {
|
|||||||
size_t getIndexOfNonEmptyEngineGroup(EngineGroupType engineGroupType) const;
|
size_t getIndexOfNonEmptyEngineGroup(EngineGroupType engineGroupType) const;
|
||||||
EngineControl &getEngine(uint32_t index);
|
EngineControl &getEngine(uint32_t index);
|
||||||
EngineControl &getDefaultEngine();
|
EngineControl &getDefaultEngine();
|
||||||
|
EngineControl &getNextEngineForCommandQueue();
|
||||||
EngineControl &getInternalEngine();
|
EngineControl &getInternalEngine();
|
||||||
EngineControl *getInternalCopyEngine();
|
EngineControl *getInternalCopyEngine();
|
||||||
SelectorCopyEngine &getSelectorCopyEngine();
|
SelectorCopyEngine &getSelectorCopyEngine();
|
||||||
@@ -172,6 +173,7 @@ class Device : public ReferenceTrackedObject<Device> {
|
|||||||
aub_stream::EngineType engineInstancedType = aub_stream::EngineType::NUM_ENGINES;
|
aub_stream::EngineType engineInstancedType = aub_stream::EngineType::NUM_ENGINES;
|
||||||
uint32_t defaultEngineIndex = 0;
|
uint32_t defaultEngineIndex = 0;
|
||||||
uint32_t numSubDevices = 0;
|
uint32_t numSubDevices = 0;
|
||||||
|
std::atomic_uint32_t regularCommandQueuesCreatedWithinDeviceCount{0};
|
||||||
bool hasGenericSubDevices = false;
|
bool hasGenericSubDevices = false;
|
||||||
bool engineInstanced = false;
|
bool engineInstanced = false;
|
||||||
bool rootCsrCreated = false;
|
bool rootCsrCreated = false;
|
||||||
|
|||||||
@@ -108,6 +108,7 @@ class HwHelper {
|
|||||||
virtual bool useOnlyGlobalTimestamps() const = 0;
|
virtual bool useOnlyGlobalTimestamps() const = 0;
|
||||||
virtual bool useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const = 0;
|
virtual bool useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const = 0;
|
||||||
virtual bool packedFormatsSupported() const = 0;
|
virtual bool packedFormatsSupported() const = 0;
|
||||||
|
virtual bool isAssignEngineRoundRobinSupported() const = 0;
|
||||||
virtual bool isRcsAvailable(const HardwareInfo &hwInfo) const = 0;
|
virtual bool isRcsAvailable(const HardwareInfo &hwInfo) const = 0;
|
||||||
virtual bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const = 0;
|
virtual bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const = 0;
|
||||||
virtual uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
|
virtual uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
|
||||||
@@ -357,6 +358,8 @@ class HwHelperHw : public HwHelper {
|
|||||||
|
|
||||||
bool additionalPipeControlArgsRequired() const override;
|
bool additionalPipeControlArgsRequired() const override;
|
||||||
|
|
||||||
|
bool isAssignEngineRoundRobinSupported() const override;
|
||||||
|
|
||||||
bool isEngineTypeRemappingToHwSpecificRequired() const override;
|
bool isEngineTypeRemappingToHwSpecificRequired() const override;
|
||||||
|
|
||||||
bool isSipKernelAsHexadecimalArrayPreferred() const override;
|
bool isSipKernelAsHexadecimalArrayPreferred() const override;
|
||||||
|
|||||||
@@ -40,6 +40,11 @@ bool HwHelperHw<GfxFamily>::timestampPacketWriteSupported() const {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
bool HwHelperHw<GfxFamily>::isAssignEngineRoundRobinSupported() const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
const EngineInstancesContainer HwHelperHw<GfxFamily>::getGpgpuEngineInstances(const HardwareInfo &hwInfo) const {
|
const EngineInstancesContainer HwHelperHw<GfxFamily>::getGpgpuEngineInstances(const HardwareInfo &hwInfo) const {
|
||||||
return {
|
return {
|
||||||
|
|||||||
@@ -133,6 +133,11 @@ uint32_t HwHelperHw<GfxFamily>::getPlanarYuvMaxHeight() const {
|
|||||||
return planarYuvMaxHeight;
|
return planarYuvMaxHeight;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename GfxFamily>
|
||||||
|
bool HwHelperHw<GfxFamily>::isAssignEngineRoundRobinSupported() const {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
aub_stream::MMIOList HwHelperHw<GfxFamily>::getExtraMmioList(const HardwareInfo &hwInfo, const GmmHelper &gmmHelper) const {
|
aub_stream::MMIOList HwHelperHw<GfxFamily>::getExtraMmioList(const HardwareInfo &hwInfo, const GmmHelper &gmmHelper) const {
|
||||||
aub_stream::MMIOList mmioList;
|
aub_stream::MMIOList mmioList;
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ struct UltHwConfig {
|
|||||||
bool useHwCsr = false;
|
bool useHwCsr = false;
|
||||||
bool useMockedPrepareDeviceEnvironmentsFunc = true;
|
bool useMockedPrepareDeviceEnvironmentsFunc = true;
|
||||||
bool forceOsAgnosticMemoryManager = true;
|
bool forceOsAgnosticMemoryManager = true;
|
||||||
|
bool useRoundRobindEngineAssign = false;
|
||||||
|
|
||||||
bool csrFailInitDirectSubmission = false;
|
bool csrFailInitDirectSubmission = false;
|
||||||
bool csrBaseCallDirectSubmissionAvailable = false;
|
bool csrBaseCallDirectSubmissionAvailable = false;
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ void NEO::BaseUltConfigListener::OnTestEnd(const ::testing::TestInfo &) {
|
|||||||
|
|
||||||
// Ensure that global state is restored
|
// Ensure that global state is restored
|
||||||
UltHwConfig expectedState{};
|
UltHwConfig expectedState{};
|
||||||
static_assert(sizeof(UltHwConfig) == 11 * sizeof(bool), ""); // Ensure that there is no internal padding
|
static_assert(sizeof(UltHwConfig) == 12 * sizeof(bool), ""); // Ensure that there is no internal padding
|
||||||
EXPECT_EQ(0, memcmp(&expectedState, &ultHwConfig, sizeof(UltHwConfig)));
|
EXPECT_EQ(0, memcmp(&expectedState, &ultHwConfig, sizeof(UltHwConfig)));
|
||||||
EXPECT_EQ(0, memcmp(&referencedHwInfo, defaultHwInfo.get(), sizeof(HardwareInfo)));
|
EXPECT_EQ(0, memcmp(&referencedHwInfo, defaultHwInfo.get(), sizeof(HardwareInfo)));
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user