diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index 52e528bb8e..f2548a8843 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -152,12 +152,22 @@ ze_result_t DeviceImp::canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t * ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList) { auto &engineGroups = getActiveDevice()->getRegularEngineGroups(); - if (desc->commandQueueGroupOrdinal >= engineGroups.size()) { + uint32_t numEngineGroups = static_cast(engineGroups.size()); + auto &subDeviceEngineGroups = this->getSubDeviceCopyEngineGroups(); + + if (!this->isQueueGroupOrdinalValid(desc->commandQueueGroupOrdinal)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } + + NEO::EngineGroupType engineGroupType{}; + if (desc->commandQueueGroupOrdinal < numEngineGroups) { + engineGroupType = engineGroups[desc->commandQueueGroupOrdinal].engineGroupType; + } else { + engineGroupType = subDeviceEngineGroups[desc->commandQueueGroupOrdinal - numEngineGroups].engineGroupType; + } + auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; ze_result_t returnValue = ZE_RESULT_SUCCESS; - auto engineGroupType = engineGroups[desc->commandQueueGroupOrdinal].engineGroupType; *commandList = CommandList::create(productFamily, this, engineGroupType, desc->flags, returnValue); return returnValue; @@ -166,11 +176,21 @@ ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc, ze_result_t DeviceImp::createCommandListImmediate(const ze_command_queue_desc_t *desc, ze_command_list_handle_t *phCommandList) { auto &engineGroups = getActiveDevice()->getRegularEngineGroups(); - if (desc->ordinal >= engineGroups.size()) { + uint32_t numEngineGroups = static_cast(engineGroups.size()); + auto &subDeviceEngineGroups = this->getSubDeviceCopyEngineGroups(); + + if (!this->isQueueGroupOrdinalValid(desc->ordinal)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } + + NEO::EngineGroupType engineGroupType{}; + if (desc->ordinal < numEngineGroups) { + engineGroupType = engineGroups[desc->ordinal].engineGroupType; + } else { + engineGroupType = subDeviceEngineGroups[desc->ordinal - numEngineGroups].engineGroupType; + } + auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily; - auto engineGroupType = engineGroups[desc->ordinal].engineGroupType; ze_result_t returnValue = ZE_RESULT_SUCCESS; *phCommandList = CommandList::createImmediate(productFamily, this, desc, false, engineGroupType, returnValue); @@ -183,11 +203,19 @@ ze_result_t DeviceImp::createCommandQueue(const ze_command_queue_desc_t *desc, NEO::CommandStreamReceiver *csr = nullptr; auto &engineGroups = getActiveDevice()->getRegularEngineGroups(); - if (desc->ordinal >= engineGroups.size()) { + uint32_t numEngineGroups = static_cast(engineGroups.size()); + auto &subDeviceEngineGroups = this->getSubDeviceCopyEngineGroups(); + + if (!this->isQueueGroupOrdinalValid(desc->ordinal)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - bool isCopyOnly = NEO::EngineHelper::isCopyOnlyEngineType(engineGroups[desc->ordinal].engineGroupType); + bool isCopyOnly = false; + if (desc->ordinal < numEngineGroups) { + isCopyOnly = NEO::EngineHelper::isCopyOnlyEngineType(engineGroups[desc->ordinal].engineGroupType); + } else { + isCopyOnly = NEO::EngineHelper::isCopyOnlyEngineType(subDeviceEngineGroups[desc->ordinal - numEngineGroups].engineGroupType); + } if (desc->priority == ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW && !isCopyOnly) { getCsrForLowPriority(&csr); @@ -206,14 +234,65 @@ ze_result_t DeviceImp::createCommandQueue(const ze_command_queue_desc_t *desc, return returnValue; } +void DeviceImp::populateSubDeviceCopyEngineGroups() { + NEO::Device *activeDevice = this->getActiveDevice(); + if (this->isImplicitScalingCapable() == false || activeDevice->getNumSubDevices() == 0) { + return; + } + + NEO::Device *activeSubDevice = activeDevice->getSubDevice(0u); + auto &subDeviceEngineGroups = activeSubDevice->getRegularEngineGroups(); + uint32_t numSubDeviceEngineGroups = static_cast(subDeviceEngineGroups.size()); + + for (uint32_t subDeviceQueueGroupsIter = 0; subDeviceQueueGroupsIter < numSubDeviceEngineGroups; subDeviceQueueGroupsIter++) { + if (subDeviceEngineGroups[subDeviceQueueGroupsIter].engineGroupType == NEO::EngineGroupType::Copy || + subDeviceEngineGroups[subDeviceQueueGroupsIter].engineGroupType == NEO::EngineGroupType::LinkedCopy) { + subDeviceCopyEngineGroups.push_back(subDeviceEngineGroups[subDeviceQueueGroupsIter]); + } + } +} + +NEO::Device::EngineGroupsT &DeviceImp::getSubDeviceCopyEngineGroups() { + return this->subDeviceCopyEngineGroups; +} + +uint32_t DeviceImp::getCopyQueueGroupsFromSubDevice(uint32_t numberOfSubDeviceCopyEngineGroupsRequested, + ze_command_queue_group_properties_t *pCommandQueueGroupProperties) { + auto &subDeviceCopyEngineGroups = this->getSubDeviceCopyEngineGroups(); + uint32_t numSubDeviceCopyEngineGroups = static_cast(subDeviceCopyEngineGroups.size()); + + if (pCommandQueueGroupProperties == nullptr) { + return numSubDeviceCopyEngineGroups; + } + + const auto &hardwareInfo = this->neoDevice->getHardwareInfo(); + auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); + auto &l0HwHelper = L0HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); + + uint32_t subDeviceQueueGroupsIter = 0; + for (; subDeviceQueueGroupsIter < std::min(numSubDeviceCopyEngineGroups, numberOfSubDeviceCopyEngineGroupsRequested); subDeviceQueueGroupsIter++) { + pCommandQueueGroupProperties[subDeviceQueueGroupsIter].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY; + pCommandQueueGroupProperties[subDeviceQueueGroupsIter].maxMemoryFillPatternSize = hwHelper.getMaxFillPaternSizeForCopyEngine(); + + l0HwHelper.setAdditionalGroupProperty(pCommandQueueGroupProperties[subDeviceQueueGroupsIter], subDeviceCopyEngineGroups[subDeviceQueueGroupsIter].engineGroupType); + pCommandQueueGroupProperties[subDeviceQueueGroupsIter].numQueues = static_cast(subDeviceCopyEngineGroups[subDeviceQueueGroupsIter].engines.size()); + } + + return subDeviceQueueGroupsIter; +} + ze_result_t DeviceImp::getCommandQueueGroupProperties(uint32_t *pCount, ze_command_queue_group_properties_t *pCommandQueueGroupProperties) { NEO::Device *activeDevice = getActiveDevice(); auto &engineGroups = activeDevice->getRegularEngineGroups(); uint32_t numEngineGroups = static_cast(engineGroups.size()); + uint32_t numSubDeviceCopyEngineGroups = getCopyQueueGroupsFromSubDevice(std::numeric_limits::max(), nullptr); + + uint32_t totalEngineGroups = numEngineGroups + numSubDeviceCopyEngineGroups; + if (*pCount == 0) { - *pCount = numEngineGroups; + *pCount = totalEngineGroups; return ZE_RESULT_SUCCESS; } @@ -221,8 +300,8 @@ ze_result_t DeviceImp::getCommandQueueGroupProperties(uint32_t *pCount, auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto &l0HwHelper = L0HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); - *pCount = std::min(numEngineGroups, *pCount); - for (uint32_t i = 0; i < *pCount; i++) { + *pCount = std::min(totalEngineGroups, *pCount); + for (uint32_t i = 0; i < std::min(numEngineGroups, *pCount); i++) { if (engineGroups[i].engineGroupType == NEO::EngineGroupType::RenderCompute) { pCommandQueueGroupProperties[i].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE | ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY | @@ -244,6 +323,11 @@ ze_result_t DeviceImp::getCommandQueueGroupProperties(uint32_t *pCount, pCommandQueueGroupProperties[i].numQueues = static_cast(engineGroups[i].engines.size()); } + if (*pCount > numEngineGroups) { + uint32_t remainingEngineGroups = *pCount - numEngineGroups; + getCopyQueueGroupsFromSubDevice(remainingEngineGroups, &pCommandQueueGroupProperties[numEngineGroups]); + } + return ZE_RESULT_SUCCESS; } @@ -930,6 +1014,9 @@ Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice, bool } device->createSysmanHandle(isSubDevice); device->resourcesReleased = false; + + device->populateSubDeviceCopyEngineGroups(); + return device; } @@ -1087,13 +1174,41 @@ void DeviceImp::storeReusableAllocation(NEO::GraphicsAllocation &alloc) { allocationsForReuse->pushFrontOne(alloc); } +bool DeviceImp::isQueueGroupOrdinalValid(uint32_t ordinal) { + auto &engineGroups = getActiveDevice()->getRegularEngineGroups(); + uint32_t numEngineGroups = static_cast(engineGroups.size()); + auto &subDeviceCopyEngineGroups = this->getSubDeviceCopyEngineGroups(); + uint32_t numSubDeviceCopyEngineGroups = static_cast(subDeviceCopyEngineGroups.size()); + + uint32_t totalEngineGroups = numEngineGroups + numSubDeviceCopyEngineGroups; + if (ordinal >= totalEngineGroups) { + return false; + } + + return true; +} + ze_result_t DeviceImp::getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr, uint32_t ordinal, uint32_t index) { auto &engineGroups = getActiveDevice()->getRegularEngineGroups(); - if ((ordinal >= engineGroups.size()) || - (index >= engineGroups[ordinal].engines.size())) { + uint32_t numEngineGroups = static_cast(engineGroups.size()); + auto &subDeviceCopyEngineGroups = this->getSubDeviceCopyEngineGroups(); + + if (!this->isQueueGroupOrdinalValid(ordinal)) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - *csr = engineGroups[ordinal].engines[index].commandStreamReceiver; + + if (ordinal < numEngineGroups) { + if (index >= engineGroups[ordinal].engines.size()) { + return ZE_RESULT_ERROR_INVALID_ARGUMENT; + } + *csr = engineGroups[ordinal].engines[index].commandStreamReceiver; + } else { + if (index >= subDeviceCopyEngineGroups[ordinal - numEngineGroups].engines.size()) { + return ZE_RESULT_ERROR_INVALID_ARGUMENT; + } + *csr = subDeviceCopyEngineGroups[ordinal - numEngineGroups].engines[index].commandStreamReceiver; + } + return ZE_RESULT_SUCCESS; } diff --git a/level_zero/core/source/device/device_imp.h b/level_zero/core/source/device/device_imp.h index f44bd187cd..b604955233 100644 --- a/level_zero/core/source/device/device_imp.h +++ b/level_zero/core/source/device/device_imp.h @@ -55,6 +55,8 @@ struct DeviceImp : public Device { ze_result_t setCacheAdvice(void *ptr, size_t regionSize, ze_cache_ext_region_t cacheRegion) override; ze_result_t imageGetProperties(const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) override; ze_result_t getDeviceImageProperties(ze_device_image_properties_t *pDeviceImageProperties) override; + uint32_t getCopyQueueGroupsFromSubDevice(uint32_t numberOfSubDeviceCopyEngineGroupsRequested, + ze_command_queue_group_properties_t *pCommandQueueGroupProperties); ze_result_t getCommandQueueGroupProperties(uint32_t *pCount, ze_command_queue_group_properties_t *pCommandQueueGroupProperties) override; ze_result_t getExternalMemoryProperties(ze_device_external_memory_properties_t *pExternalMemoryProperties) override; @@ -124,8 +126,13 @@ struct DeviceImp : public Device { std::unique_ptr allocationsForReuse; std::unique_ptr driverInfo; void createSysmanHandle(bool isSubDevice); + NEO::Device::EngineGroupsT &getSubDeviceCopyEngineGroups(); + void populateSubDeviceCopyEngineGroups(); + bool isQueueGroupOrdinalValid(uint32_t ordinal); protected: + NEO::Device::EngineGroupsT subDeviceCopyEngineGroups{}; + NEO::GraphicsAllocation *debugSurface = nullptr; SysmanDevice *pSysmanDevice = nullptr; std::unique_ptr debugSession = nullptr; diff --git a/level_zero/core/test/unit_tests/fixtures/device_fixture.h b/level_zero/core/test/unit_tests/fixtures/device_fixture.h index a5943e5fa2..34a3efdd0a 100644 --- a/level_zero/core/test/unit_tests/fixtures/device_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/device_fixture.h @@ -200,5 +200,93 @@ struct MultipleDevicesWithCustomHwInfo { const uint32_t numSubDevices = 2u; }; +template +struct SingleRootMultiSubDeviceFixtureWithImplicitScaling : public MultiDeviceFixture { + NEO::MockCompilerEnableGuard compilerMock = NEO::MockCompilerEnableGuard(true); + + DebugManagerStateRestore restorer; + std::unique_ptr> driverHandle; + std::vector devices; + uint32_t numRootDevices = 1u; + uint32_t numSubDevices = 2u; + L0::ContextImp *context = nullptr; + + L0::Device *device = nullptr; + NEO::Device *neoDevice = nullptr; + L0::DeviceImp *deviceImp = nullptr; + + NEO::HardwareInfo hwInfo; + uint32_t expectedCopyEngineCount = copyEngineCount; + uint32_t expectedComputeEngineCount = 0; + + uint32_t numEngineGroups = 0; + uint32_t subDeviceNumEngineGroups = 0; + + void SetUp() { // NOLINT(readability-identifier-naming) + DebugManagerStateRestore restorer; + DebugManager.flags.EnableImplicitScaling.set(implicitScaling); + DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); + DebugManager.flags.CreateMultipleSubDevices.set(numSubDevices); + + NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); + hwInfo.featureTable.flags.ftrRcsNode = false; + hwInfo.featureTable.flags.ftrCCSNode = true; + // hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; + if (expectedCopyEngineCount != 0) { + hwInfo.capabilityTable.blitterOperationsSupported = true; + hwInfo.featureTable.ftrBcsInfo = maxNBitValue(expectedCopyEngineCount); + } else { + hwInfo.capabilityTable.blitterOperationsSupported = false; + } + + if (implicitScaling) { + expectedComputeEngineCount = 1u; + } else { + expectedComputeEngineCount = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled; + } + + MockDevice *mockDevice = MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0); + + NEO::DeviceVector devices; + devices.push_back(std::unique_ptr(mockDevice)); + + driverHandle = std::make_unique>(); + ze_result_t res = driverHandle->initialize(std::move(devices)); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + ze_context_handle_t hContext; + ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0}; + res = driverHandle->createContext(&desc, 0u, nullptr, &hContext); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + context = static_cast(Context::fromHandle(hContext)); + + device = driverHandle->devices[0]; + neoDevice = device->getNEODevice(); + deviceImp = static_cast(device); + + NEO::Device *activeDevice = deviceImp->getActiveDevice(); + auto &engineGroups = activeDevice->getRegularEngineGroups(); + numEngineGroups = static_cast(engineGroups.size()); + + if (activeDevice->getSubDevices().size() > 0) { + NEO::Device *activeSubDevice = activeDevice->getSubDevice(0u); + (void)activeSubDevice; + auto &subDeviceEngineGroups = activeSubDevice->getRegularEngineGroups(); + (void)subDeviceEngineGroups; + + for (uint32_t i = 0; i < subDeviceEngineGroups.size(); i++) { + if (subDeviceEngineGroups[i].engineGroupType == NEO::EngineGroupType::Copy || + subDeviceEngineGroups[i].engineGroupType == NEO::EngineGroupType::LinkedCopy) { + subDeviceNumEngineGroups += 1; + } + } + } + } + + void TearDown() { // NOLINT(readability-identifier-naming) + context->destroy(); + } +}; + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_device_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_device_xe_hpc_core.cpp index 57c2c8da81..9cd2aa364c 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_device_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_device_xe_hpc_core.cpp @@ -11,6 +11,7 @@ #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" +#include "level_zero/core/source/cmdlist/cmdlist_imp.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" @@ -124,6 +125,238 @@ HWTEST2_F(DeviceTestXeHpc, givenXeHpcBStepWhenCreatingMultiTileDeviceThenExpectI delete device; } +using MultiDeviceCommandQueueGroupWithNineCopyEnginesTest = Test>; + +HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest, givenMainAndLinkCopyEngineSupportAndCCSAndImplicitScalingThenExpectedQueueGroupsAreReturned, IsXeHpcCore) { + uint32_t count = 0; + ze_result_t res = deviceImp->getCommandQueueGroupProperties(&count, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_EQ(count, numEngineGroups + subDeviceNumEngineGroups); + + std::vector properties(count); + res = deviceImp->getCommandQueueGroupProperties(&count, properties.data()); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + uint32_t numCopyQueues = 0; + for (uint32_t i = 0; i < count; i++) { + if (properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { + EXPECT_EQ(properties[i].numQueues, 1u); + } else if ((properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY) && + !(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE)) { + numCopyQueues += properties[i].numQueues; + } + } + EXPECT_EQ(numCopyQueues, expectedCopyEngineCount); +} + +HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest, + givenMainAndLinkCopyEngineSupportAndCCSAndImplicitScalingThenCommandListCreatedWithCorrectDevice, IsXeHpcCore) { + uint32_t count = 0; + ze_result_t res = deviceImp->getCommandQueueGroupProperties(&count, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_EQ(count, numEngineGroups + subDeviceNumEngineGroups); + + std::vector properties(count); + res = deviceImp->getCommandQueueGroupProperties(&count, properties.data()); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + uint32_t numCopyQueues = 0; + for (uint32_t i = 0; i < count; i++) { + if (properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { + EXPECT_EQ(properties[i].numQueues, 1u); + } else if ((properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY) && + !(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE)) { + numCopyQueues += properties[i].numQueues; + } + } + EXPECT_EQ(numCopyQueues, expectedCopyEngineCount); + + ze_command_list_handle_t hComputeCommandList{}; + ze_command_list_desc_t computeDesc{}; + computeDesc.commandQueueGroupOrdinal = numEngineGroups - 1; + res = deviceImp->createCommandList(&computeDesc, &hComputeCommandList); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + CommandListImp *computeCommandList = static_cast(CommandList::fromHandle(hComputeCommandList)); + EXPECT_FALSE(computeCommandList->isCopyOnly()); + + ze_command_queue_handle_t hCommandQueue{}; + ze_command_queue_desc_t computeCommandQueueDesc{}; + computeCommandQueueDesc.ordinal = computeDesc.commandQueueGroupOrdinal; + res = device->createCommandQueue(&computeCommandQueueDesc, &hCommandQueue); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + CommandQueue *computeCommandQueue = static_cast(CommandQueue::fromHandle(hCommandQueue)); + EXPECT_FALSE(computeCommandQueue->peekIsCopyOnlyCommandQueue()); + + ze_command_list_handle_t hCopyCommandList{}; + ze_command_list_desc_t copyDesc{}; + copyDesc.commandQueueGroupOrdinal = numEngineGroups + 1; + res = deviceImp->createCommandList(©Desc, &hCopyCommandList); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + CommandListImp *copyCommandList = static_cast(CommandList::fromHandle(hCopyCommandList)); + EXPECT_TRUE(copyCommandList->isCopyOnly()); + + computeCommandQueue->destroy(); + computeCommandList->destroy(); + copyCommandList->destroy(); +} + +HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest, + givenMainAndLinkCopyEngineSupportAndCCSAndImplicitScalingWhenPassingIncorrectIndexThenInvalidArgumentIsReturned, IsXeHpcCore) { + uint32_t count = 0; + ze_result_t res = deviceImp->getCommandQueueGroupProperties(&count, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_EQ(count, numEngineGroups + subDeviceNumEngineGroups); + + std::vector properties(count); + res = deviceImp->getCommandQueueGroupProperties(&count, properties.data()); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + uint32_t numCopyQueues = 0; + for (uint32_t i = 0; i < count; i++) { + if (properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { + EXPECT_EQ(properties[i].numQueues, 1u); + } else if ((properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY) && + !(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE)) { + numCopyQueues += properties[i].numQueues; + } + } + EXPECT_EQ(numCopyQueues, expectedCopyEngineCount); + + ze_command_queue_handle_t hCommandQueue{}; + ze_command_queue_desc_t computeCommandQueueDesc{}; + computeCommandQueueDesc.ordinal = numEngineGroups + 1; + computeCommandQueueDesc.index = numCopyQueues + 2; + res = device->createCommandQueue(&computeCommandQueueDesc, &hCommandQueue); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res); +} + +HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest, + givenMainAndLinkCopyEngineSupportAndCCSAndImplicitScalingThenImmediateCommandListCreatedWithCorrectDevice, IsXeHpcCore) { + uint32_t count = 0; + ze_result_t res = deviceImp->getCommandQueueGroupProperties(&count, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_EQ(count, numEngineGroups + subDeviceNumEngineGroups); + + std::vector properties(count); + res = deviceImp->getCommandQueueGroupProperties(&count, properties.data()); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + uint32_t numCopyQueues = 0; + for (uint32_t i = 0; i < count; i++) { + if (properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { + EXPECT_EQ(properties[i].numQueues, 1u); + } else if ((properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY) && + !(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE)) { + numCopyQueues += properties[i].numQueues; + } + } + EXPECT_EQ(numCopyQueues, expectedCopyEngineCount); + + ze_command_list_handle_t hComputeCommandList{}; + ze_command_queue_desc_t computeDesc{}; + computeDesc.ordinal = numEngineGroups - 1; + res = deviceImp->createCommandListImmediate(&computeDesc, &hComputeCommandList); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + CommandListImp *computeCommandList = static_cast(CommandList::fromHandle(hComputeCommandList)); + EXPECT_FALSE(computeCommandList->isCopyOnly()); + + ze_command_list_handle_t hCopyCommandList{}; + ze_command_queue_desc_t copyDesc{}; + copyDesc.ordinal = numEngineGroups + 1; + res = deviceImp->createCommandListImmediate(©Desc, &hCopyCommandList); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + CommandListImp *copyCommandList = static_cast(CommandList::fromHandle(hCopyCommandList)); + EXPECT_TRUE(copyCommandList->isCopyOnly()); + + computeCommandList->destroy(); + copyCommandList->destroy(); +} + +HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest, givenMainAndLinkCopyEngineSupportAndCCSAndImplicitScalingWhenRequestingFewerGroupsThenExpectedGroupsAreReturned, IsXeHpcCore) { + uint32_t count = 0; + ze_result_t res = deviceImp->getCommandQueueGroupProperties(&count, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_EQ(count, numEngineGroups + subDeviceNumEngineGroups); + + count--; + std::vector properties(count); + deviceImp->getCommandQueueGroupProperties(&count, properties.data()); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + uint32_t numCopyQueues = 0; + for (uint32_t i = 0; i < count; i++) { + if (properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { + EXPECT_EQ(properties[i].numQueues, 1u); + } else if ((properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY) && + !(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE)) { + numCopyQueues += properties[i].numQueues; + } + } + EXPECT_LE(numCopyQueues, expectedCopyEngineCount); +} + +HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest, givenMainAndLinkCopyEngineSupportAndCCSAndImplicitScalingWhenRequestingOnlyOneGroupThenOneQueueGroupIsReturned, IsXeHpcCore) { + uint32_t count = 0; + ze_result_t res = deviceImp->getCommandQueueGroupProperties(&count, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_EQ(count, numEngineGroups + subDeviceNumEngineGroups); + + count = 1; + std::vector properties(count); + res = deviceImp->getCommandQueueGroupProperties(&count, properties.data()); + EXPECT_EQ(count, 1u); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + for (uint32_t i = 0; i < count; i++) { + if (properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { + EXPECT_EQ(properties[i].numQueues, expectedComputeEngineCount); + } + } +} + +using MultiDeviceCommandQueueGroupWithNoCopyEnginesTest = Test>; +HWTEST2_F(MultiDeviceCommandQueueGroupWithNoCopyEnginesTest, + givenNoCopyEngineSupportAndCCSAndImplicitScalingThenExpectedQueueGroupsAreReturned, IsXeHpcCore) { + uint32_t count = 0; + ze_result_t res = deviceImp->getCommandQueueGroupProperties(&count, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_EQ(count, numEngineGroups); + + std::vector properties(count); + res = deviceImp->getCommandQueueGroupProperties(&count, properties.data()); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + for (uint32_t i = 0; i < count; i++) { + if (properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { + EXPECT_EQ(properties[i].numQueues, expectedComputeEngineCount); + } + } +} + +using MultiDeviceCommandQueueGroupWithNoCopyEnginesAndNoImplicitScalingTest = Test>; +HWTEST2_F(MultiDeviceCommandQueueGroupWithNoCopyEnginesAndNoImplicitScalingTest, + givenNoCopyEngineSupportAndCCSAndNoImplicitScalingThenOnlyTheQueueGroupsFromSubDeviceAreReturned, IsXeHpcCore) { + uint32_t count = 0; + ze_result_t res = deviceImp->getCommandQueueGroupProperties(&count, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + EXPECT_EQ(count, numEngineGroups); + + std::vector properties(count); + res = deviceImp->getCommandQueueGroupProperties(&count, properties.data()); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + for (uint32_t i = 0; i < count; i++) { + if (properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { + EXPECT_EQ(properties[i].numQueues, expectedComputeEngineCount); + } + } +} + using CommandQueueGroupTest = Test; HWTEST2_F(CommandQueueGroupTest, givenNoBlitterSupportAndNoCCSThenOneQueueGroupIsReturned, IsXeHpcCore) { diff --git a/programmers-guide/IMPLICIT_SCALING.md b/programmers-guide/IMPLICIT_SCALING.md index bba2b10de0..b5d06ad7ef 100644 --- a/programmers-guide/IMPLICIT_SCALING.md +++ b/programmers-guide/IMPLICIT_SCALING.md @@ -36,7 +36,12 @@ When doing allocations in implicit scaling mode, driver *colors* an allocation a When scheduling a kernel for execution, driver distributes the kernel workgroups among the available tiles. Default mechanism is called *Static Partitioning*, where the workgroups are evenly distributed among tiles. For instance, in a 2-tile system, half of the workgroups go to tile 0, and the other half to tile 1. -The number of CCSs, or compute engines, currently available with implicit scaing on the root device is one. This is because with implicit scaling the driver automatically uses all the EUs available in the device, so no other CCSs are exposed. Even though only one CCS is exposed, multiple kernels submitted to the root device using implicit scaling may execute concurrently on PVC, depending on EU availability. On XeHP_SDV, they may be serialized. See [Limitations](#Limitations) section below. +The number of CCSs, or compute engines, currently available with implicit scaling on the root device is one. This is because with implicit scaling the driver automatically uses all the EUs available in the device, so no other CCSs are exposed. Even though only one CCS is exposed, multiple kernels submitted to the root device using implicit scaling may execute concurrently on PVC, depending on EU availability. On XeHP_SDV, they may be serialized. See [Limitations](#Limitations) section below. + +No implicit scaling support is available for BCSs. Considering that, two models are followed in terms of discovery of copy engines: + +* In Level Zero, the copy engines from sub-device 0 are exposed also in the root device. This to align the engine model on both the implicit and the non-implicit-scaling scenarios. +* In OpenCL, copy engines are not exposed in the root device. Since implicit scaling is only done for EUs, which are associated only with kernels submitted to CCS, BCSs are currently not being exposed and access to them are done through sub-device handles.