Add round robin engine assign controls

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2022-03-11 13:56:59 +00:00
committed by Compute-Runtime-Automation
parent cee7ded064
commit f8449fb216
5 changed files with 166 additions and 2 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
* Copyright (C) 2019-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -1017,6 +1017,136 @@ HWTEST_F(EngineInstancedDeviceTests, whenCreateMultipleCommandQueuesThenEnginesA
}
}
HWTEST_F(EngineInstancedDeviceTests, givenCmdQRoundRobindEngineAssignBitfieldwWenCreateMultipleCommandQueuesThenEnginesAreAssignedUsingRoundRobinSkippingNotAvailableEngines) {
constexpr uint32_t genericDevicesCount = 1;
constexpr uint32_t ccsCount = 4;
DebugManagerStateRestore restorer;
DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1);
DebugManager.flags.CmdQRoundRobindEngineAssignBitfield.set(0b1101);
if (!createDevices(genericDevicesCount, ccsCount)) {
GTEST_SKIP();
}
auto &hwInfo = rootDevice->getHardwareInfo();
EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
auto clRootDevice = std::make_unique<ClDevice>(*rootDevice, nullptr);
cl_device_id device_ids[] = {clRootDevice.get()};
ClDeviceVector deviceVector{device_ids, 1};
MockContext context(deviceVector);
std::array<std::unique_ptr<MockCommandQueueHw<FamilyType>>, 24> cmdQs;
for (auto &cmdQ : cmdQs) {
cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, clRootDevice.get(), nullptr);
}
const auto &defaultEngine = clRootDevice->getDefaultEngine();
const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo);
auto defaultEngineGroupIndex = clRootDevice->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType);
auto engines = clRootDevice->getDevice().getRegularEngineGroups()[defaultEngineGroupIndex].engines;
for (size_t i = 0, j = 0; i < cmdQs.size(); i++, j++) {
if ((j % engines.size()) == 1) {
j++;
}
auto engineIndex = j % engines.size();
auto expectedCsr = engines[engineIndex].commandStreamReceiver;
auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver();
EXPECT_EQ(csr, expectedCsr);
}
}
HWTEST_F(EngineInstancedDeviceTests, givenCmdQRoundRobindEngineAssignNTo1wWenCreateMultipleCommandQueuesThenEnginesAreAssignedUsingRoundRobinAndNQueuesShareSameCsr) {
constexpr uint32_t genericDevicesCount = 1;
constexpr uint32_t ccsCount = 4;
DebugManagerStateRestore restorer;
DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1);
DebugManager.flags.CmdQRoundRobindEngineAssignNTo1.set(3);
if (!createDevices(genericDevicesCount, ccsCount)) {
GTEST_SKIP();
}
auto &hwInfo = rootDevice->getHardwareInfo();
EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
auto clRootDevice = std::make_unique<ClDevice>(*rootDevice, nullptr);
cl_device_id device_ids[] = {clRootDevice.get()};
ClDeviceVector deviceVector{device_ids, 1};
MockContext context(deviceVector);
std::array<std::unique_ptr<MockCommandQueueHw<FamilyType>>, 24> cmdQs;
for (auto &cmdQ : cmdQs) {
cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, clRootDevice.get(), nullptr);
}
const auto &defaultEngine = clRootDevice->getDefaultEngine();
const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo);
auto defaultEngineGroupIndex = clRootDevice->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType);
auto engines = clRootDevice->getDevice().getRegularEngineGroups()[defaultEngineGroupIndex].engines;
for (size_t i = 0, j = 0; i < cmdQs.size(); i++, j++) {
auto engineIndex = (j / 3) % engines.size();
auto expectedCsr = engines[engineIndex].commandStreamReceiver;
auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver();
EXPECT_EQ(csr, expectedCsr);
}
}
HWTEST_F(EngineInstancedDeviceTests, givenCmdQRoundRobindEngineAssignNTo1AndCmdQRoundRobindEngineAssignBitfieldwWenCreateMultipleCommandQueuesThenEnginesAreAssignedProperlyUsingRoundRobin) {
constexpr uint32_t genericDevicesCount = 1;
constexpr uint32_t ccsCount = 4;
DebugManagerStateRestore restorer;
DebugManager.flags.EnableCmdQRoundRobindEngineAssign.set(1);
DebugManager.flags.CmdQRoundRobindEngineAssignNTo1.set(3);
DebugManager.flags.CmdQRoundRobindEngineAssignBitfield.set(0b1101);
if (!createDevices(genericDevicesCount, ccsCount)) {
GTEST_SKIP();
}
auto &hwInfo = rootDevice->getHardwareInfo();
EXPECT_EQ(ccsCount, hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
auto clRootDevice = std::make_unique<ClDevice>(*rootDevice, nullptr);
cl_device_id device_ids[] = {clRootDevice.get()};
ClDeviceVector deviceVector{device_ids, 1};
MockContext context(deviceVector);
std::array<std::unique_ptr<MockCommandQueueHw<FamilyType>>, 24> cmdQs;
for (auto &cmdQ : cmdQs) {
cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(&context, clRootDevice.get(), nullptr);
}
const auto &defaultEngine = clRootDevice->getDefaultEngine();
const auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
const auto engineGroupType = hwHelper.getEngineGroupType(defaultEngine.getEngineType(), defaultEngine.getEngineUsage(), hwInfo);
auto defaultEngineGroupIndex = clRootDevice->getDevice().getEngineGroupIndexFromEngineGroupType(engineGroupType);
auto engines = clRootDevice->getDevice().getRegularEngineGroups()[defaultEngineGroupIndex].engines;
for (size_t i = 0, j = 0; i < cmdQs.size(); i++, j++) {
while (((j / 3) % engines.size()) == 1) {
j++;
}
auto engineIndex = (j / 3) % engines.size();
auto expectedCsr = engines[engineIndex].commandStreamReceiver;
auto csr = &cmdQs[i]->getGpgpuCommandStreamReceiver();
EXPECT_EQ(csr, expectedCsr);
}
}
HWTEST_F(EngineInstancedDeviceTests, givenEnableCmdQRoundRobindEngineAssignDisabledWenCreateMultipleCommandQueuesThenDefaultEngineAssigned) {
constexpr uint32_t genericDevicesCount = 1;
constexpr uint32_t ccsCount = 4;

View File

@ -316,6 +316,8 @@ OverrideUseKmdWaitFunction = -1
EnableCacheFlushAfterWalkerForAllQueues = -1
Force32BitDriverSupport = -1
EnableCmdQRoundRobindEngineAssign = -1
CmdQRoundRobindEngineAssignBitfield = -1
CmdQRoundRobindEngineAssignNTo1 = -1
EnableCmdQRoundRobindBcsEngineAssign = -1
EnableCmdQRoundRobindBcsEngineAssignLimit = -1
EnableCmdQRoundRobindBcsEngineAssignStartingValue = -1

View File

@ -358,6 +358,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableUserFenceUseCtxId, -1, "-1: default (disab
DECLARE_DEBUG_VARIABLE(int32_t, SetKmdWaitTimeout, -1, "-1: default (infinity), >0: amount of time units for wait function timeout")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideNotifyEnableForTagUpdatePostSync, -1, "-1: default (usage determined by user fence wait call), 0: disable use of NotifyEnable flag, 1: enable use NotifyEnable flag")
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindEngineAssign, -1, "-1: default, 0: disable, 1: enable")
DECLARE_DEBUG_VARIABLE(int32_t, CmdQRoundRobindEngineAssignBitfield, -1, "-1: default, >0: bitfield with supported engines")
DECLARE_DEBUG_VARIABLE(int32_t, CmdQRoundRobindEngineAssignNTo1, -1, "-1: default, >0: assign same engine to N queues")
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindBcsEngineAssign, -1, "-1: default, 0: disable, 1: enable")
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindBcsEngineAssignLimit, -1, "-1: default, >=0: round robin limit")
DECLARE_DEBUG_VARIABLE(int32_t, EnableCmdQRoundRobindBcsEngineAssignStartingValue, -1, "-1: default, >=0: round robin starting point")

View File

@ -577,6 +577,8 @@ EngineControl &Device::getInternalEngine() {
}
EngineControl &Device::getNextEngineForCommandQueue() {
this->initializeEngineRoundRobinControls();
const auto &defaultEngine = this->getDefaultEngine();
const auto &hardwareInfo = this->getHardwareInfo();
@ -586,7 +588,10 @@ EngineControl &Device::getNextEngineForCommandQueue() {
const auto defaultEngineGroupIndex = this->getEngineGroupIndexFromEngineGroupType(engineGroupType);
auto &engineGroup = this->getRegularEngineGroups()[defaultEngineGroupIndex];
const auto engineIndex = this->regularCommandQueuesCreatedWithinDeviceCount++ % engineGroup.engines.size();
auto engineIndex = 0u;
do {
engineIndex = (this->regularCommandQueuesCreatedWithinDeviceCount++ / this->queuesPerEngineCount) % engineGroup.engines.size();
} while (!this->availableEnginesForCommandQueueusRoundRobin.test(engineIndex));
return engineGroup.engines[engineIndex];
}
@ -648,6 +653,28 @@ void Device::finalizeRayTracing() {
}
}
void Device::initializeEngineRoundRobinControls() {
if (this->availableEnginesForCommandQueueusRoundRobin.any()) {
return;
}
uint32_t queuesPerEngine = 1u;
if (DebugManager.flags.CmdQRoundRobindEngineAssignNTo1.get() != -1) {
queuesPerEngine = DebugManager.flags.CmdQRoundRobindEngineAssignNTo1.get();
}
this->queuesPerEngineCount = queuesPerEngine;
std::bitset<8> availableEngines = std::numeric_limits<uint8_t>::max();
if (DebugManager.flags.CmdQRoundRobindEngineAssignBitfield.get() != -1) {
availableEngines = DebugManager.flags.CmdQRoundRobindEngineAssignBitfield.get();
}
this->availableEnginesForCommandQueueusRoundRobin = availableEngines;
}
OSTime *Device::getOSTime() const { return getRootDeviceEnvironment().osTime.get(); };
bool Device::getUuid(std::array<uint8_t, HwInfoConfig::uuidSize> &uuid) {

View File

@ -184,6 +184,9 @@ class Device : public ReferenceTrackedObject<Device> {
uint32_t defaultEngineIndex = 0;
uint32_t numSubDevices = 0;
std::atomic_uint32_t regularCommandQueuesCreatedWithinDeviceCount{0};
std::bitset<8> availableEnginesForCommandQueueusRoundRobin = 0;
uint32_t queuesPerEngineCount = 1;
void initializeEngineRoundRobinControls();
bool hasGenericSubDevices = false;
bool engineInstanced = false;
bool rootCsrCreated = false;