From 88cccaf32888282ca7e86e273c0b86418357bdd8 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Fri, 24 Nov 2023 08:45:33 +0000 Subject: [PATCH] feature: add support for ForceBcsEngineIndex flag 2 Related-To: NEO-8356 Signed-off-by: Dunajski, Bartosz --- level_zero/core/source/device/device_imp.cpp | 35 ++++ .../core/test/unit_tests/mocks/mock_device.h | 1 + .../xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp | 184 ++++++++++++++++++ .../xe_hpc_core/test_device_xe_hpc_core.cpp | 99 ++++++++++ 4 files changed, 319 insertions(+) diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index a6bbc805e6..d9d22d13fb 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -1526,6 +1526,41 @@ ze_result_t DeviceImp::getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr return ZE_RESULT_ERROR_INVALID_ARGUMENT; } + if ((NEO::DebugManager.flags.ForceBcsEngineIndex.get() != -1) && NEO::EngineHelper::isCopyOnlyEngineType(getEngineGroupTypeForOrdinal(ordinal))) { + index = static_cast(NEO::DebugManager.flags.ForceBcsEngineIndex.get()); + + constexpr uint32_t invalidOrdinal = std::numeric_limits::max(); + + auto findOrdinal = [&](NEO::EngineGroupType type) -> uint32_t { + bool subDeviceCopyEngines = (ordinal >= numEngineGroups); + auto &lookupGroup = subDeviceCopyEngines ? this->subDeviceCopyEngineGroups : engineGroups; + + uint32_t ordinal = invalidOrdinal; + + for (uint32_t i = 0; i < lookupGroup.size(); i++) { + if (lookupGroup[i].engineGroupType == type) { + ordinal = (i + (subDeviceCopyEngines ? numEngineGroups : 0)); + break; + } + } + + return ordinal; + }; + + if (index == 0 && getEngineGroupTypeForOrdinal(ordinal) != NEO::EngineGroupType::Copy) { + ordinal = findOrdinal(NEO::EngineGroupType::Copy); + } else if (index > 0) { + if (getEngineGroupTypeForOrdinal(ordinal) != NEO::EngineGroupType::LinkedCopy) { + ordinal = findOrdinal(NEO::EngineGroupType::LinkedCopy); + } + index--; + } + + if (ordinal == invalidOrdinal) { + return ZE_RESULT_ERROR_INVALID_ARGUMENT; + } + } + if (ordinal < numEngineGroups) { auto &engines = engineGroups[ordinal].engines; if (index >= engines.size()) { diff --git a/level_zero/core/test/unit_tests/mocks/mock_device.h b/level_zero/core/test/unit_tests/mocks/mock_device.h index e3b9c71817..7d6826e278 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_device.h +++ b/level_zero/core/test/unit_tests/mocks/mock_device.h @@ -105,6 +105,7 @@ struct MockDeviceImp : public L0::DeviceImp { using Base::getNEODevice; using Base::implicitScalingCapable; using Base::neoDevice; + using Base::subDeviceCopyEngineGroups; MockDeviceImp(NEO::Device *device, NEO::ExecutionEnvironment *execEnv) { device->incRefInternal(); diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp index 8ccdbbd8f2..1a3767924b 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp @@ -117,6 +117,190 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenCommandQueueWhenExecutingCommandListsF commandQueue->destroy(); } +HWTEST2_F(CommandQueueCommandsXeHpc, givenDebugFlagWithLinkedEngineSetWhenCreatingCommandQueueThenOverrideEngineIndex, IsXeHpcCore) { + DebugManagerStateRestore restore; + const uint32_t newIndex = 2; + DebugManager.flags.ForceBcsEngineIndex.set(newIndex); + ze_result_t returnValue; + auto hwInfo = *NEO::defaultHwInfo; + hwInfo.featureTable.ftrBcsInfo = 0b111111111; + hwInfo.capabilityTable.blitterOperationsSupported = true; + + auto testNeoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); + + auto testL0Device = std::unique_ptr(L0::Device::create(driverHandle.get(), testNeoDevice, false, &returnValue)); + + auto &engineGroups = testNeoDevice->getRegularEngineGroups(); + + uint32_t expectedCopyOrdinal = 0; + for (uint32_t i = 0; i < engineGroups.size(); i++) { + if (engineGroups[i].engineGroupType == EngineGroupType::LinkedCopy) { + expectedCopyOrdinal = i; + break; + } + } + + bool queueCreated = false; + bool hasMultiInstancedEngine = false; + for (uint32_t ordinal = 0; ordinal < engineGroups.size(); ordinal++) { + for (uint32_t index = 0; index < engineGroups[ordinal].engines.size(); index++) { + bool copyOrdinal = NEO::EngineHelper::isCopyOnlyEngineType(engineGroups[ordinal].engineGroupType); + if (engineGroups[ordinal].engines.size() > 1 && copyOrdinal) { + hasMultiInstancedEngine = true; + } + + ze_command_queue_handle_t commandQueue = {}; + + ze_command_queue_desc_t desc = {}; + desc.ordinal = ordinal; + desc.index = index; + ze_result_t res = context->createCommandQueue(testL0Device.get(), &desc, &commandQueue); + + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_NE(nullptr, commandQueue); + + auto queue = whiteboxCast(L0::CommandQueue::fromHandle(commandQueue)); + + if (copyOrdinal) { + EXPECT_EQ(engineGroups[expectedCopyOrdinal].engines[newIndex - 1].commandStreamReceiver, queue->csr); + queueCreated = true; + } else { + EXPECT_EQ(engineGroups[ordinal].engines[index].commandStreamReceiver, queue->csr); + } + + queue->destroy(); + } + } + + EXPECT_EQ(hasMultiInstancedEngine, queueCreated); +} + +HWTEST2_F(CommandQueueCommandsXeHpc, givenDebugFlagWithInvalidIndexSetWhenCreatingCommandQueueThenReturnError, IsXeHpcCore) { + DebugManagerStateRestore restore; + const uint32_t newIndex = 999; + DebugManager.flags.ForceBcsEngineIndex.set(newIndex); + ze_result_t returnValue; + auto hwInfo = *NEO::defaultHwInfo; + hwInfo.featureTable.ftrBcsInfo = 0b111111111; + hwInfo.capabilityTable.blitterOperationsSupported = true; + + auto testNeoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); + + auto testL0Device = std::unique_ptr(L0::Device::create(driverHandle.get(), testNeoDevice, false, &returnValue)); + + auto &engineGroups = testNeoDevice->getRegularEngineGroups(); + + uint32_t expectedCopyOrdinal = 0; + for (uint32_t i = 0; i < engineGroups.size(); i++) { + if (engineGroups[i].engineGroupType == EngineGroupType::LinkedCopy) { + expectedCopyOrdinal = i; + break; + } + } + + ze_command_queue_handle_t commandQueue = {}; + + ze_command_queue_desc_t desc = {}; + desc.ordinal = expectedCopyOrdinal; + desc.index = 0; + ze_result_t res = context->createCommandQueue(testL0Device.get(), &desc, &commandQueue); + + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); + EXPECT_EQ(nullptr, commandQueue); +} + +HWTEST2_F(CommandQueueCommandsXeHpc, givenDebugFlagWithNonExistingIndexSetWhenCreatingCommandQueueThenReturnError, IsXeHpcCore) { + DebugManagerStateRestore restore; + const uint32_t newIndex = 1; + DebugManager.flags.ForceBcsEngineIndex.set(newIndex); + ze_result_t returnValue; + auto hwInfo = *NEO::defaultHwInfo; + hwInfo.featureTable.ftrBcsInfo = 1; + hwInfo.capabilityTable.blitterOperationsSupported = true; + + auto testNeoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); + + auto testL0Device = std::unique_ptr(L0::Device::create(driverHandle.get(), testNeoDevice, false, &returnValue)); + + auto &engineGroups = testNeoDevice->getRegularEngineGroups(); + + uint32_t expectedCopyOrdinal = 0; + for (uint32_t i = 0; i < engineGroups.size(); i++) { + if (engineGroups[i].engineGroupType == EngineGroupType::Copy) { + expectedCopyOrdinal = i; + break; + } + } + + ze_command_queue_handle_t commandQueue = {}; + + ze_command_queue_desc_t desc = {}; + desc.ordinal = expectedCopyOrdinal; + desc.index = 0; + ze_result_t res = context->createCommandQueue(testL0Device.get(), &desc, &commandQueue); + + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); + EXPECT_EQ(nullptr, commandQueue); +} + +HWTEST2_F(CommandQueueCommandsXeHpc, givenDebugFlagWithMainEngineSetWhenCreatingCommandQueueThenOverrideEngineIndex, IsXeHpcCore) { + DebugManagerStateRestore restore; + const uint32_t newIndex = 0; + DebugManager.flags.ForceBcsEngineIndex.set(newIndex); + ze_result_t returnValue; + auto hwInfo = *NEO::defaultHwInfo; + hwInfo.featureTable.ftrBcsInfo = 0b111111111; + hwInfo.capabilityTable.blitterOperationsSupported = true; + + auto testNeoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); + + auto testL0Device = std::unique_ptr(L0::Device::create(driverHandle.get(), testNeoDevice, false, &returnValue)); + + auto &engineGroups = testNeoDevice->getRegularEngineGroups(); + + uint32_t expectedCopyOrdinal = 0; + for (uint32_t i = 0; i < engineGroups.size(); i++) { + if (engineGroups[i].engineGroupType == EngineGroupType::Copy) { + expectedCopyOrdinal = i; + break; + } + } + + bool queueCreated = false; + bool hasMultiInstancedEngine = false; + for (uint32_t ordinal = 0; ordinal < engineGroups.size(); ordinal++) { + for (uint32_t index = 0; index < engineGroups[ordinal].engines.size(); index++) { + bool copyOrdinal = NEO::EngineHelper::isCopyOnlyEngineType(engineGroups[ordinal].engineGroupType); + if (engineGroups[ordinal].engines.size() > 1 && copyOrdinal) { + hasMultiInstancedEngine = true; + } + + ze_command_queue_handle_t commandQueue = {}; + + ze_command_queue_desc_t desc = {}; + desc.ordinal = ordinal; + desc.index = index; + ze_result_t res = context->createCommandQueue(testL0Device.get(), &desc, &commandQueue); + + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_NE(nullptr, commandQueue); + + auto queue = whiteboxCast(L0::CommandQueue::fromHandle(commandQueue)); + + if (copyOrdinal) { + EXPECT_EQ(engineGroups[expectedCopyOrdinal].engines[newIndex].commandStreamReceiver, queue->csr); + queueCreated = true; + } else { + EXPECT_EQ(engineGroups[ordinal].engines[index].commandStreamReceiver, queue->csr); + } + + queue->destroy(); + } + } + + EXPECT_EQ(hasMultiInstancedEngine, queueCreated); +} + HWTEST2_F(CommandQueueCommandsXeHpc, givenLinkedCopyEngineOrdinalWhenCreatingThenSetAsCopyOnly, IsXeHpcCore) { ze_result_t returnValue; auto hwInfo = *NEO::defaultHwInfo; diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_device_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_device_xe_hpc_core.cpp index da282adae0..6950b800d3 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_device_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_device_xe_hpc_core.cpp @@ -17,6 +17,7 @@ #include "level_zero/core/source/cmdqueue/cmdqueue.h" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" +#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/include/ze_intel_gpu.h" @@ -216,6 +217,104 @@ HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest, EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); } +HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest, givenDebugFlagWithLinkedEngineSetWhenCreatingCommandQueueThenOverrideEngineIndex, IsXeHpcCore) { + DebugManagerStateRestore restore; + const uint32_t newIndex = 2; + DebugManager.flags.ForceBcsEngineIndex.set(newIndex); + + auto &engineGroups = static_cast(deviceImp)->subDeviceCopyEngineGroups; + + uint32_t expectedCopyOrdinal = 0; + for (uint32_t i = 0; i < engineGroups.size(); i++) { + if (engineGroups[i].engineGroupType == EngineGroupType::LinkedCopy) { + expectedCopyOrdinal = i; + break; + } + } + + for (uint32_t ordinal = 0; ordinal < engineGroups.size(); ordinal++) { + for (uint32_t index = 0; index < engineGroups[ordinal].engines.size(); index++) { + ze_command_queue_handle_t commandQueue = {}; + + ze_command_queue_desc_t desc = {}; + desc.ordinal = ordinal + 1; + desc.index = index; + ze_result_t res = context->createCommandQueue(deviceImp, &desc, &commandQueue); + + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_NE(nullptr, commandQueue); + + auto queue = whiteboxCast(L0::CommandQueue::fromHandle(commandQueue)); + + EXPECT_EQ(engineGroups[expectedCopyOrdinal].engines[newIndex - 1].commandStreamReceiver, queue->csr); + + queue->destroy(); + } + } +} + +HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest, givenDebugFlagWithInvalidIndexSetWhenCreatingCommandQueueThenReturnError, IsXeHpcCore) { + DebugManagerStateRestore restore; + const uint32_t newIndex = 999; + DebugManager.flags.ForceBcsEngineIndex.set(newIndex); + + auto &engineGroups = static_cast(deviceImp)->subDeviceCopyEngineGroups; + + uint32_t expectedCopyOrdinal = 0; + for (uint32_t i = 0; i < engineGroups.size(); i++) { + if (engineGroups[i].engineGroupType == EngineGroupType::LinkedCopy) { + expectedCopyOrdinal = i; + break; + } + } + + ze_command_queue_handle_t commandQueue = {}; + + ze_command_queue_desc_t desc = {}; + desc.ordinal = expectedCopyOrdinal + 1; + desc.index = 0; + ze_result_t res = context->createCommandQueue(deviceImp, &desc, &commandQueue); + + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); + EXPECT_EQ(nullptr, commandQueue); +} + +HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest, givenDebugFlagWithMainEngineSetWhenCreatingCommandQueueThenOverrideEngineIndex, IsXeHpcCore) { + DebugManagerStateRestore restore; + const uint32_t newIndex = 0; + DebugManager.flags.ForceBcsEngineIndex.set(newIndex); + + auto &engineGroups = static_cast(deviceImp)->subDeviceCopyEngineGroups; + + uint32_t expectedCopyOrdinal = 0; + for (uint32_t i = 0; i < engineGroups.size(); i++) { + if (engineGroups[i].engineGroupType == EngineGroupType::Copy) { + expectedCopyOrdinal = i; + break; + } + } + + for (uint32_t ordinal = 0; ordinal < engineGroups.size(); ordinal++) { + for (uint32_t index = 0; index < engineGroups[ordinal].engines.size(); index++) { + ze_command_queue_handle_t commandQueue = {}; + + ze_command_queue_desc_t desc = {}; + desc.ordinal = ordinal + 1; + desc.index = index; + ze_result_t res = context->createCommandQueue(deviceImp, &desc, &commandQueue); + + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_NE(nullptr, commandQueue); + + auto queue = whiteboxCast(L0::CommandQueue::fromHandle(commandQueue)); + + EXPECT_EQ(engineGroups[expectedCopyOrdinal].engines[newIndex].commandStreamReceiver, queue->csr); + + queue->destroy(); + } + } +} + HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest, givenMainAndLinkCopyEngineSupportAndCCSAndImplicitScalingThenImmediateCommandListCreatedWithCorrectDevice, IsXeHpcCore) { uint32_t count = 0;