mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-22 10:17:01 +08:00
Add round robin engine assign controls
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
cee7ded064
commit
f8449fb216
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2019-2021 Intel Corporation
|
* Copyright (C) 2019-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -1017,6 +1017,136 @@ HWTEST_F(EngineInstancedDeviceTests, whenCreateMultipleCommandQueuesThenEnginesA
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(EngineInstancedDeviceTests, givenCmdQRoundRobindEngineAssignBitfieldwWenCreateMultipleCommandQueuesThenEnginesAreAssignedUsingRoundRobinSkippingNotAvailableEngines) {
|
||||||
|
constexpr uint32_t genericDevicesCount = 1;
|
||||||
|
constexpr uint32_t ccsCount = 4;
|
||||||
|
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1);
|
||||||
|
DebugManager.flags.CmdQRoundRobindEngineAssignBitfield.set(0b1101);
|
||||||
|
|
||||||
|
if (!createDevices(genericDevicesCount, ccsCount)) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto &hwInfo = rootDevice->getHardwareInfo();
|
||||||
|
EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
|
||||||
|
|
||||||
|
auto clRootDevice = std::make_unique<ClDevice>(*rootDevice, nullptr);
|
||||||
|
cl_device_id device_ids[] = {clRootDevice.get()};
|
||||||
|
ClDeviceVector deviceVector{device_ids, 1};
|
||||||
|
MockContext context(deviceVector);
|
||||||
|
|
||||||
|
std::array<std::unique_ptr<MockCommandQueueHw<FamilyType>>, 24> cmdQs;
|
||||||
|
for (auto &cmdQ : cmdQs) {
|
||||||
|
cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, clRootDevice.get(), nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto &defaultEngine = clRootDevice->getDefaultEngine();
|
||||||
|
const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||||
|
const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo);
|
||||||
|
|
||||||
|
auto defaultEngineGroupIndex = clRootDevice->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType);
|
||||||
|
auto engines = clRootDevice->getDevice().getRegularEngineGroups()[defaultEngineGroupIndex].engines;
|
||||||
|
|
||||||
|
for (size_t i = 0, j = 0; i < cmdQs.size(); i++, j++) {
|
||||||
|
if ((j % engines.size()) == 1) {
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
auto engineIndex = j % engines.size();
|
||||||
|
auto expectedCsr = engines[engineIndex].commandStreamReceiver;
|
||||||
|
auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver();
|
||||||
|
|
||||||
|
EXPECT_EQ(csr, expectedCsr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(EngineInstancedDeviceTests, givenCmdQRoundRobindEngineAssignNTo1wWenCreateMultipleCommandQueuesThenEnginesAreAssignedUsingRoundRobinAndNQueuesShareSameCsr) {
|
||||||
|
constexpr uint32_t genericDevicesCount = 1;
|
||||||
|
constexpr uint32_t ccsCount = 4;
|
||||||
|
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1);
|
||||||
|
DebugManager.flags.CmdQRoundRobindEngineAssignNTo1.set(3);
|
||||||
|
|
||||||
|
if (!createDevices(genericDevicesCount, ccsCount)) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto &hwInfo = rootDevice->getHardwareInfo();
|
||||||
|
EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
|
||||||
|
|
||||||
|
auto clRootDevice = std::make_unique<ClDevice>(*rootDevice, nullptr);
|
||||||
|
cl_device_id device_ids[] = {clRootDevice.get()};
|
||||||
|
ClDeviceVector deviceVector{device_ids, 1};
|
||||||
|
MockContext context(deviceVector);
|
||||||
|
|
||||||
|
std::array<std::unique_ptr<MockCommandQueueHw<FamilyType>>, 24> cmdQs;
|
||||||
|
for (auto &cmdQ : cmdQs) {
|
||||||
|
cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, clRootDevice.get(), nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto &defaultEngine = clRootDevice->getDefaultEngine();
|
||||||
|
const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||||
|
const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo);
|
||||||
|
|
||||||
|
auto defaultEngineGroupIndex = clRootDevice->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType);
|
||||||
|
auto engines = clRootDevice->getDevice().getRegularEngineGroups()[defaultEngineGroupIndex].engines;
|
||||||
|
|
||||||
|
for (size_t i = 0, j = 0; i < cmdQs.size(); i++, j++) {
|
||||||
|
auto engineIndex = (j / 3) % engines.size();
|
||||||
|
auto expectedCsr = engines[engineIndex].commandStreamReceiver;
|
||||||
|
auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver();
|
||||||
|
|
||||||
|
EXPECT_EQ(csr, expectedCsr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(EngineInstancedDeviceTests, givenCmdQRoundRobindEngineAssignNTo1AndCmdQRoundRobindEngineAssignBitfieldwWenCreateMultipleCommandQueuesThenEnginesAreAssignedProperlyUsingRoundRobin) {
|
||||||
|
constexpr uint32_t genericDevicesCount = 1;
|
||||||
|
constexpr uint32_t ccsCount = 4;
|
||||||
|
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1);
|
||||||
|
DebugManager.flags.CmdQRoundRobindEngineAssignNTo1.set(3);
|
||||||
|
DebugManager.flags.CmdQRoundRobindEngineAssignBitfield.set(0b1101);
|
||||||
|
|
||||||
|
if (!createDevices(genericDevicesCount, ccsCount)) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto &hwInfo = rootDevice->getHardwareInfo();
|
||||||
|
EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
|
||||||
|
|
||||||
|
auto clRootDevice = std::make_unique<ClDevice>(*rootDevice, nullptr);
|
||||||
|
cl_device_id device_ids[] = {clRootDevice.get()};
|
||||||
|
ClDeviceVector deviceVector{device_ids, 1};
|
||||||
|
MockContext context(deviceVector);
|
||||||
|
|
||||||
|
std::array<std::unique_ptr<MockCommandQueueHw<FamilyType>>, 24> cmdQs;
|
||||||
|
for (auto &cmdQ : cmdQs) {
|
||||||
|
cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, clRootDevice.get(), nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto &defaultEngine = clRootDevice->getDefaultEngine();
|
||||||
|
const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||||
|
const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo);
|
||||||
|
|
||||||
|
auto defaultEngineGroupIndex = clRootDevice->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType);
|
||||||
|
auto engines = clRootDevice->getDevice().getRegularEngineGroups()[defaultEngineGroupIndex].engines;
|
||||||
|
|
||||||
|
for (size_t i = 0, j = 0; i < cmdQs.size(); i++, j++) {
|
||||||
|
while (((j / 3) % engines.size()) == 1) {
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
auto engineIndex = (j / 3) % engines.size();
|
||||||
|
auto expectedCsr = engines[engineIndex].commandStreamReceiver;
|
||||||
|
auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver();
|
||||||
|
|
||||||
|
EXPECT_EQ(csr, expectedCsr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(EngineInstancedDeviceTests, givenEnableCmdQRoundRobindEngineAssignDisabledWenCreateMultipleCommandQueuesThenDefaultEngineAssigned) {
|
HWTEST_F(EngineInstancedDeviceTests, givenEnableCmdQRoundRobindEngineAssignDisabledWenCreateMultipleCommandQueuesThenDefaultEngineAssigned) {
|
||||||
constexpr uint32_t genericDevicesCount = 1;
|
constexpr uint32_t genericDevicesCount = 1;
|
||||||
constexpr uint32_t ccsCount = 4;
|
constexpr uint32_t ccsCount = 4;
|
||||||
|
|||||||
@@ -316,6 +316,8 @@ OverrideUseKmdWaitFunction = -1
|
|||||||
EnableCacheFlushAfterWalkerForAllQueues = -1
|
EnableCacheFlushAfterWalkerForAllQueues = -1
|
||||||
Force32BitDriverSupport = -1
|
Force32BitDriverSupport = -1
|
||||||
EnableCmdQRoundRobindEngineAssign = -1
|
EnableCmdQRoundRobindEngineAssign = -1
|
||||||
|
CmdQRoundRobindEngineAssignBitfield = -1
|
||||||
|
CmdQRoundRobindEngineAssignNTo1 = -1
|
||||||
EnableCmdQRoundRobindBcsEngineAssign = -1
|
EnableCmdQRoundRobindBcsEngineAssign = -1
|
||||||
EnableCmdQRoundRobindBcsEngineAssignLimit = -1
|
EnableCmdQRoundRobindBcsEngineAssignLimit = -1
|
||||||
EnableCmdQRoundRobindBcsEngineAssignStartingValue = -1
|
EnableCmdQRoundRobindBcsEngineAssignStartingValue = -1
|
||||||
|
|||||||
@@ -358,6 +358,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableUserFenceUseCtxId, -1, "-1: default (disab
|
|||||||
DECLARE_DEBUG_VARIABLE(int32_t, SetKmdWaitTimeout, -1, "-1: default (infinity), >0: amount of time units for wait function timeout")
|
DECLARE_DEBUG_VARIABLE(int32_t, SetKmdWaitTimeout, -1, "-1: default (infinity), >0: amount of time units for wait function timeout")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideNotifyEnableForTagUpdatePostSync, -1, "-1: default (usage determined by user fence wait call), 0: disable use of NotifyEnable flag, 1: enable use NotifyEnable flag")
|
DECLARE_DEBUG_VARIABLE(int32_t, OverrideNotifyEnableForTagUpdatePostSync, -1, "-1: default (usage determined by user fence wait call), 0: disable use of NotifyEnable flag, 1: enable use NotifyEnable flag")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindEngineAssign, -1, "-1: default, 0: disable, 1: enable")
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindEngineAssign, -1, "-1: default, 0: disable, 1: enable")
|
||||||
|
DECLARE_DEBUG_VARIABLE(int32_t, CmdQRoundRobindEngineAssignBitfield, -1, "-1: default, >0: bitfield with supported engines")
|
||||||
|
DECLARE_DEBUG_VARIABLE(int32_t, CmdQRoundRobindEngineAssignNTo1, -1, "-1: default, >0: assign same engine to N queues")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindBcsEngineAssign, -1, "-1: default, 0: disable, 1: enable")
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindBcsEngineAssign, -1, "-1: default, 0: disable, 1: enable")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindBcsEngineAssignLimit, -1, "-1: default, >=0: round robin limit")
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindBcsEngineAssignLimit, -1, "-1: default, >=0: round robin limit")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindBcsEngineAssignStartingValue, -1, "-1: default, >=0: round robin starting point")
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindBcsEngineAssignStartingValue, -1, "-1: default, >=0: round robin starting point")
|
||||||
|
|||||||
@@ -577,6 +577,8 @@ EngineControl &Device::getInternalEngine() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EngineControl &Device::getNextEngineForCommandQueue() {
|
EngineControl &Device::getNextEngineForCommandQueue() {
|
||||||
|
this->initializeEngineRoundRobinControls();
|
||||||
|
|
||||||
const auto &defaultEngine = this->getDefaultEngine();
|
const auto &defaultEngine = this->getDefaultEngine();
|
||||||
|
|
||||||
const auto &hardwareInfo = this->getHardwareInfo();
|
const auto &hardwareInfo = this->getHardwareInfo();
|
||||||
@@ -586,7 +588,10 @@ EngineControl &Device::getNextEngineForCommandQueue() {
|
|||||||
const auto defaultEngineGroupIndex = this->getEngineGroupIndexFromEngineGroupType(engineGroupType);
|
const auto defaultEngineGroupIndex = this->getEngineGroupIndexFromEngineGroupType(engineGroupType);
|
||||||
auto &engineGroup = this->getRegularEngineGroups()[defaultEngineGroupIndex];
|
auto &engineGroup = this->getRegularEngineGroups()[defaultEngineGroupIndex];
|
||||||
|
|
||||||
const auto engineIndex = this->regularCommandQueuesCreatedWithinDeviceCount++ % engineGroup.engines.size();
|
auto engineIndex = 0u;
|
||||||
|
do {
|
||||||
|
engineIndex = (this->regularCommandQueuesCreatedWithinDeviceCount++ / this->queuesPerEngineCount) % engineGroup.engines.size();
|
||||||
|
} while (!this->availableEnginesForCommandQueueusRoundRobin.test(engineIndex));
|
||||||
return engineGroup.engines[engineIndex];
|
return engineGroup.engines[engineIndex];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -648,6 +653,28 @@ void Device::finalizeRayTracing() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Device::initializeEngineRoundRobinControls() {
|
||||||
|
if (this->availableEnginesForCommandQueueusRoundRobin.any()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t queuesPerEngine = 1u;
|
||||||
|
|
||||||
|
if (DebugManager.flags.CmdQRoundRobindEngineAssignNTo1.get() != -1) {
|
||||||
|
queuesPerEngine = DebugManager.flags.CmdQRoundRobindEngineAssignNTo1.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
this->queuesPerEngineCount = queuesPerEngine;
|
||||||
|
|
||||||
|
std::bitset<8> availableEngines = std::numeric_limits<uint8_t>::max();
|
||||||
|
|
||||||
|
if (DebugManager.flags.CmdQRoundRobindEngineAssignBitfield.get() != -1) {
|
||||||
|
availableEngines = DebugManager.flags.CmdQRoundRobindEngineAssignBitfield.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
this->availableEnginesForCommandQueueusRoundRobin = availableEngines;
|
||||||
|
}
|
||||||
|
|
||||||
OSTime *Device::getOSTime() const { return getRootDeviceEnvironment().osTime.get(); };
|
OSTime *Device::getOSTime() const { return getRootDeviceEnvironment().osTime.get(); };
|
||||||
|
|
||||||
bool Device::getUuid(std::array<uint8_t, HwInfoConfig::uuidSize> &uuid) {
|
bool Device::getUuid(std::array<uint8_t, HwInfoConfig::uuidSize> &uuid) {
|
||||||
|
|||||||
@@ -184,6 +184,9 @@ class Device : public ReferenceTrackedObject<Device> {
|
|||||||
uint32_t defaultEngineIndex = 0;
|
uint32_t defaultEngineIndex = 0;
|
||||||
uint32_t numSubDevices = 0;
|
uint32_t numSubDevices = 0;
|
||||||
std::atomic_uint32_t regularCommandQueuesCreatedWithinDeviceCount{0};
|
std::atomic_uint32_t regularCommandQueuesCreatedWithinDeviceCount{0};
|
||||||
|
std::bitset<8> availableEnginesForCommandQueueusRoundRobin = 0;
|
||||||
|
uint32_t queuesPerEngineCount = 1;
|
||||||
|
void initializeEngineRoundRobinControls();
|
||||||
bool hasGenericSubDevices = false;
|
bool hasGenericSubDevices = false;
|
||||||
bool engineInstanced = false;
|
bool engineInstanced = false;
|
||||||
bool rootCsrCreated = false;
|
bool rootCsrCreated = false;
|
||||||
|
|||||||
Reference in New Issue
Block a user