mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Add round robin engine assign controls
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
cee7ded064
commit
f8449fb216
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
* Copyright (C) 2019-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -1017,6 +1017,136 @@ HWTEST_F(EngineInstancedDeviceTests, whenCreateMultipleCommandQueuesThenEnginesA
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(EngineInstancedDeviceTests, givenCmdQRoundRobindEngineAssignBitfieldwWenCreateMultipleCommandQueuesThenEnginesAreAssignedUsingRoundRobinSkippingNotAvailableEngines) {
|
||||
constexpr uint32_t genericDevicesCount = 1;
|
||||
constexpr uint32_t ccsCount = 4;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1);
|
||||
DebugManager.flags.CmdQRoundRobindEngineAssignBitfield.set(0b1101);
|
||||
|
||||
if (!createDevices(genericDevicesCount, ccsCount)) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
auto &hwInfo = rootDevice->getHardwareInfo();
|
||||
EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
|
||||
|
||||
auto clRootDevice = std::make_unique<ClDevice>(*rootDevice, nullptr);
|
||||
cl_device_id device_ids[] = {clRootDevice.get()};
|
||||
ClDeviceVector deviceVector{device_ids, 1};
|
||||
MockContext context(deviceVector);
|
||||
|
||||
std::array<std::unique_ptr<MockCommandQueueHw<FamilyType>>, 24> cmdQs;
|
||||
for (auto &cmdQ : cmdQs) {
|
||||
cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, clRootDevice.get(), nullptr);
|
||||
}
|
||||
|
||||
const auto &defaultEngine = clRootDevice->getDefaultEngine();
|
||||
const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo);
|
||||
|
||||
auto defaultEngineGroupIndex = clRootDevice->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType);
|
||||
auto engines = clRootDevice->getDevice().getRegularEngineGroups()[defaultEngineGroupIndex].engines;
|
||||
|
||||
for (size_t i = 0, j = 0; i < cmdQs.size(); i++, j++) {
|
||||
if ((j % engines.size()) == 1) {
|
||||
j++;
|
||||
}
|
||||
auto engineIndex = j % engines.size();
|
||||
auto expectedCsr = engines[engineIndex].commandStreamReceiver;
|
||||
auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver();
|
||||
|
||||
EXPECT_EQ(csr, expectedCsr);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(EngineInstancedDeviceTests, givenCmdQRoundRobindEngineAssignNTo1wWenCreateMultipleCommandQueuesThenEnginesAreAssignedUsingRoundRobinAndNQueuesShareSameCsr) {
|
||||
constexpr uint32_t genericDevicesCount = 1;
|
||||
constexpr uint32_t ccsCount = 4;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1);
|
||||
DebugManager.flags.CmdQRoundRobindEngineAssignNTo1.set(3);
|
||||
|
||||
if (!createDevices(genericDevicesCount, ccsCount)) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
auto &hwInfo = rootDevice->getHardwareInfo();
|
||||
EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
|
||||
|
||||
auto clRootDevice = std::make_unique<ClDevice>(*rootDevice, nullptr);
|
||||
cl_device_id device_ids[] = {clRootDevice.get()};
|
||||
ClDeviceVector deviceVector{device_ids, 1};
|
||||
MockContext context(deviceVector);
|
||||
|
||||
std::array<std::unique_ptr<MockCommandQueueHw<FamilyType>>, 24> cmdQs;
|
||||
for (auto &cmdQ : cmdQs) {
|
||||
cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, clRootDevice.get(), nullptr);
|
||||
}
|
||||
|
||||
const auto &defaultEngine = clRootDevice->getDefaultEngine();
|
||||
const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo);
|
||||
|
||||
auto defaultEngineGroupIndex = clRootDevice->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType);
|
||||
auto engines = clRootDevice->getDevice().getRegularEngineGroups()[defaultEngineGroupIndex].engines;
|
||||
|
||||
for (size_t i = 0, j = 0; i < cmdQs.size(); i++, j++) {
|
||||
auto engineIndex = (j / 3) % engines.size();
|
||||
auto expectedCsr = engines[engineIndex].commandStreamReceiver;
|
||||
auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver();
|
||||
|
||||
EXPECT_EQ(csr, expectedCsr);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(EngineInstancedDeviceTests, givenCmdQRoundRobindEngineAssignNTo1AndCmdQRoundRobindEngineAssignBitfieldwWenCreateMultipleCommandQueuesThenEnginesAreAssignedProperlyUsingRoundRobin) {
|
||||
constexpr uint32_t genericDevicesCount = 1;
|
||||
constexpr uint32_t ccsCount = 4;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1);
|
||||
DebugManager.flags.CmdQRoundRobindEngineAssignNTo1.set(3);
|
||||
DebugManager.flags.CmdQRoundRobindEngineAssignBitfield.set(0b1101);
|
||||
|
||||
if (!createDevices(genericDevicesCount, ccsCount)) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
auto &hwInfo = rootDevice->getHardwareInfo();
|
||||
EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
|
||||
|
||||
auto clRootDevice = std::make_unique<ClDevice>(*rootDevice, nullptr);
|
||||
cl_device_id device_ids[] = {clRootDevice.get()};
|
||||
ClDeviceVector deviceVector{device_ids, 1};
|
||||
MockContext context(deviceVector);
|
||||
|
||||
std::array<std::unique_ptr<MockCommandQueueHw<FamilyType>>, 24> cmdQs;
|
||||
for (auto &cmdQ : cmdQs) {
|
||||
cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, clRootDevice.get(), nullptr);
|
||||
}
|
||||
|
||||
const auto &defaultEngine = clRootDevice->getDefaultEngine();
|
||||
const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo);
|
||||
|
||||
auto defaultEngineGroupIndex = clRootDevice->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType);
|
||||
auto engines = clRootDevice->getDevice().getRegularEngineGroups()[defaultEngineGroupIndex].engines;
|
||||
|
||||
for (size_t i = 0, j = 0; i < cmdQs.size(); i++, j++) {
|
||||
while (((j / 3) % engines.size()) == 1) {
|
||||
j++;
|
||||
}
|
||||
auto engineIndex = (j / 3) % engines.size();
|
||||
auto expectedCsr = engines[engineIndex].commandStreamReceiver;
|
||||
auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver();
|
||||
|
||||
EXPECT_EQ(csr, expectedCsr);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(EngineInstancedDeviceTests, givenEnableCmdQRoundRobindEngineAssignDisabledWenCreateMultipleCommandQueuesThenDefaultEngineAssigned) {
|
||||
constexpr uint32_t genericDevicesCount = 1;
|
||||
constexpr uint32_t ccsCount = 4;
|
||||
|
@ -316,6 +316,8 @@ OverrideUseKmdWaitFunction = -1
|
||||
EnableCacheFlushAfterWalkerForAllQueues = -1
|
||||
Force32BitDriverSupport = -1
|
||||
EnableCmdQRoundRobindEngineAssign = -1
|
||||
CmdQRoundRobindEngineAssignBitfield = -1
|
||||
CmdQRoundRobindEngineAssignNTo1 = -1
|
||||
EnableCmdQRoundRobindBcsEngineAssign = -1
|
||||
EnableCmdQRoundRobindBcsEngineAssignLimit = -1
|
||||
EnableCmdQRoundRobindBcsEngineAssignStartingValue = -1
|
||||
|
@ -358,6 +358,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableUserFenceUseCtxId, -1, "-1: default (disab
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, SetKmdWaitTimeout, -1, "-1: default (infinity), >0: amount of time units for wait function timeout")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideNotifyEnableForTagUpdatePostSync, -1, "-1: default (usage determined by user fence wait call), 0: disable use of NotifyEnable flag, 1: enable use NotifyEnable flag")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindEngineAssign, -1, "-1: default, 0: disable, 1: enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, CmdQRoundRobindEngineAssignBitfield, -1, "-1: default, >0: bitfield with supported engines")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, CmdQRoundRobindEngineAssignNTo1, -1, "-1: default, >0: assign same engine to N queues")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindBcsEngineAssign, -1, "-1: default, 0: disable, 1: enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindBcsEngineAssignLimit, -1, "-1: default, >=0: round robin limit")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindBcsEngineAssignStartingValue, -1, "-1: default, >=0: round robin starting point")
|
||||
|
@ -577,6 +577,8 @@ EngineControl &Device::getInternalEngine() {
|
||||
}
|
||||
|
||||
EngineControl &Device::getNextEngineForCommandQueue() {
|
||||
this->initializeEngineRoundRobinControls();
|
||||
|
||||
const auto &defaultEngine = this->getDefaultEngine();
|
||||
|
||||
const auto &hardwareInfo = this->getHardwareInfo();
|
||||
@ -586,7 +588,10 @@ EngineControl &Device::getNextEngineForCommandQueue() {
|
||||
const auto defaultEngineGroupIndex = this->getEngineGroupIndexFromEngineGroupType(engineGroupType);
|
||||
auto &engineGroup = this->getRegularEngineGroups()[defaultEngineGroupIndex];
|
||||
|
||||
const auto engineIndex = this->regularCommandQueuesCreatedWithinDeviceCount++ % engineGroup.engines.size();
|
||||
auto engineIndex = 0u;
|
||||
do {
|
||||
engineIndex = (this->regularCommandQueuesCreatedWithinDeviceCount++ / this->queuesPerEngineCount) % engineGroup.engines.size();
|
||||
} while (!this->availableEnginesForCommandQueueusRoundRobin.test(engineIndex));
|
||||
return engineGroup.engines[engineIndex];
|
||||
}
|
||||
|
||||
@ -648,6 +653,28 @@ void Device::finalizeRayTracing() {
|
||||
}
|
||||
}
|
||||
|
||||
void Device::initializeEngineRoundRobinControls() {
|
||||
if (this->availableEnginesForCommandQueueusRoundRobin.any()) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t queuesPerEngine = 1u;
|
||||
|
||||
if (DebugManager.flags.CmdQRoundRobindEngineAssignNTo1.get() != -1) {
|
||||
queuesPerEngine = DebugManager.flags.CmdQRoundRobindEngineAssignNTo1.get();
|
||||
}
|
||||
|
||||
this->queuesPerEngineCount = queuesPerEngine;
|
||||
|
||||
std::bitset<8> availableEngines = std::numeric_limits<uint8_t>::max();
|
||||
|
||||
if (DebugManager.flags.CmdQRoundRobindEngineAssignBitfield.get() != -1) {
|
||||
availableEngines = DebugManager.flags.CmdQRoundRobindEngineAssignBitfield.get();
|
||||
}
|
||||
|
||||
this->availableEnginesForCommandQueueusRoundRobin = availableEngines;
|
||||
}
|
||||
|
||||
OSTime *Device::getOSTime() const { return getRootDeviceEnvironment().osTime.get(); };
|
||||
|
||||
bool Device::getUuid(std::array<uint8_t, HwInfoConfig::uuidSize> &uuid) {
|
||||
|
@ -184,6 +184,9 @@ class Device : public ReferenceTrackedObject<Device> {
|
||||
uint32_t defaultEngineIndex = 0;
|
||||
uint32_t numSubDevices = 0;
|
||||
std::atomic_uint32_t regularCommandQueuesCreatedWithinDeviceCount{0};
|
||||
std::bitset<8> availableEnginesForCommandQueueusRoundRobin = 0;
|
||||
uint32_t queuesPerEngineCount = 1;
|
||||
void initializeEngineRoundRobinControls();
|
||||
bool hasGenericSubDevices = false;
|
||||
bool engineInstanced = false;
|
||||
bool rootCsrCreated = false;
|
||||
|
Reference in New Issue
Block a user