Expose copy engines in parent device with implicit scaling

When using implicit scaling, expose the copy engines from
sub-device 0 in the root device. This to facilitate
programming models of layers above.

Related-To: NEO-6815

Signed-off-by: Jaime Arteaga <jaime.a.arteaga.molina@intel.com>
This commit is contained in:
Jaime Arteaga 2022-03-24 07:44:40 +00:00 committed by Compute-Runtime-Automation
parent 7a324051ef
commit 3c3dab8fe0
5 changed files with 461 additions and 13 deletions

View File

@ -152,12 +152,22 @@ ze_result_t DeviceImp::canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *
ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) {
auto &engineGroups = getActiveDevice()->getRegularEngineGroups();
if (desc->commandQueueGroupOrdinal >= engineGroups.size()) {
uint32_t numEngineGroups = static_cast<uint32_t>(engineGroups.size());
auto &subDeviceEngineGroups = this->getSubDeviceCopyEngineGroups();
if (!this->isQueueGroupOrdinalValid(desc->commandQueueGroupOrdinal)) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
NEO::EngineGroupType engineGroupType{};
if (desc->commandQueueGroupOrdinal < numEngineGroups) {
engineGroupType = engineGroups[desc->commandQueueGroupOrdinal].engineGroupType;
} else {
engineGroupType = subDeviceEngineGroups[desc->commandQueueGroupOrdinal - numEngineGroups].engineGroupType;
}
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
ze_result_t returnValue = ZE_RESULT_SUCCESS;
auto engineGroupType = engineGroups[desc->commandQueueGroupOrdinal].engineGroupType;
*commandList = CommandList::create(productFamily, this, engineGroupType, desc->flags, returnValue);
return returnValue;
@ -166,11 +176,21 @@ ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,
ze_result_t DeviceImp::createCommandListImmediate(const ze_command_queue_desc_t *desc,
ze_command_list_handle_t *phCommandList) {
auto &engineGroups = getActiveDevice()->getRegularEngineGroups();
if (desc->ordinal >= engineGroups.size()) {
uint32_t numEngineGroups = static_cast<uint32_t>(engineGroups.size());
auto &subDeviceEngineGroups = this->getSubDeviceCopyEngineGroups();
if (!this->isQueueGroupOrdinalValid(desc->ordinal)) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
NEO::EngineGroupType engineGroupType{};
if (desc->ordinal < numEngineGroups) {
engineGroupType = engineGroups[desc->ordinal].engineGroupType;
} else {
engineGroupType = subDeviceEngineGroups[desc->ordinal - numEngineGroups].engineGroupType;
}
auto productFamily = neoDevice->getHardwareInfo().platform.eProductFamily;
auto engineGroupType = engineGroups[desc->ordinal].engineGroupType;
ze_result_t returnValue = ZE_RESULT_SUCCESS;
*phCommandList = CommandList::createImmediate(productFamily, this, desc, false, engineGroupType, returnValue);
@ -183,11 +203,19 @@ ze_result_t DeviceImp::createCommandQueue(const ze_command_queue_desc_t *desc,
NEO::CommandStreamReceiver *csr = nullptr;
auto &engineGroups = getActiveDevice()->getRegularEngineGroups();
if (desc->ordinal >= engineGroups.size()) {
uint32_t numEngineGroups = static_cast<uint32_t>(engineGroups.size());
auto &subDeviceEngineGroups = this->getSubDeviceCopyEngineGroups();
if (!this->isQueueGroupOrdinalValid(desc->ordinal)) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
bool isCopyOnly = NEO::EngineHelper::isCopyOnlyEngineType(engineGroups[desc->ordinal].engineGroupType);
bool isCopyOnly = false;
if (desc->ordinal < numEngineGroups) {
isCopyOnly = NEO::EngineHelper::isCopyOnlyEngineType(engineGroups[desc->ordinal].engineGroupType);
} else {
isCopyOnly = NEO::EngineHelper::isCopyOnlyEngineType(subDeviceEngineGroups[desc->ordinal - numEngineGroups].engineGroupType);
}
if (desc->priority == ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW && !isCopyOnly) {
getCsrForLowPriority(&csr);
@ -206,14 +234,65 @@ ze_result_t DeviceImp::createCommandQueue(const ze_command_queue_desc_t *desc,
return returnValue;
}
void DeviceImp::populateSubDeviceCopyEngineGroups() {
NEO::Device *activeDevice = this->getActiveDevice();
if (this->isImplicitScalingCapable() == false || activeDevice->getNumSubDevices() == 0) {
return;
}
NEO::Device *activeSubDevice = activeDevice->getSubDevice(0u);
auto &subDeviceEngineGroups = activeSubDevice->getRegularEngineGroups();
uint32_t numSubDeviceEngineGroups = static_cast<uint32_t>(subDeviceEngineGroups.size());
for (uint32_t subDeviceQueueGroupsIter = 0; subDeviceQueueGroupsIter < numSubDeviceEngineGroups; subDeviceQueueGroupsIter++) {
if (subDeviceEngineGroups[subDeviceQueueGroupsIter].engineGroupType == NEO::EngineGroupType::Copy ||
subDeviceEngineGroups[subDeviceQueueGroupsIter].engineGroupType == NEO::EngineGroupType::LinkedCopy) {
subDeviceCopyEngineGroups.push_back(subDeviceEngineGroups[subDeviceQueueGroupsIter]);
}
}
}
NEO::Device::EngineGroupsT &DeviceImp::getSubDeviceCopyEngineGroups() {
return this->subDeviceCopyEngineGroups;
}
uint32_t DeviceImp::getCopyQueueGroupsFromSubDevice(uint32_t numberOfSubDeviceCopyEngineGroupsRequested,
ze_command_queue_group_properties_t *pCommandQueueGroupProperties) {
auto &subDeviceCopyEngineGroups = this->getSubDeviceCopyEngineGroups();
uint32_t numSubDeviceCopyEngineGroups = static_cast<uint32_t>(subDeviceCopyEngineGroups.size());
if (pCommandQueueGroupProperties == nullptr) {
return numSubDeviceCopyEngineGroups;
}
const auto &hardwareInfo = this->neoDevice->getHardwareInfo();
auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
auto &l0HwHelper = L0HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
uint32_t subDeviceQueueGroupsIter = 0;
for (; subDeviceQueueGroupsIter < std::min(numSubDeviceCopyEngineGroups, numberOfSubDeviceCopyEngineGroupsRequested); subDeviceQueueGroupsIter++) {
pCommandQueueGroupProperties[subDeviceQueueGroupsIter].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY;
pCommandQueueGroupProperties[subDeviceQueueGroupsIter].maxMemoryFillPatternSize = hwHelper.getMaxFillPaternSizeForCopyEngine();
l0HwHelper.setAdditionalGroupProperty(pCommandQueueGroupProperties[subDeviceQueueGroupsIter], subDeviceCopyEngineGroups[subDeviceQueueGroupsIter].engineGroupType);
pCommandQueueGroupProperties[subDeviceQueueGroupsIter].numQueues = static_cast<uint32_t>(subDeviceCopyEngineGroups[subDeviceQueueGroupsIter].engines.size());
}
return subDeviceQueueGroupsIter;
}
ze_result_t DeviceImp::getCommandQueueGroupProperties(uint32_t *pCount,
ze_command_queue_group_properties_t *pCommandQueueGroupProperties) {
NEO::Device *activeDevice = getActiveDevice();
auto &engineGroups = activeDevice->getRegularEngineGroups();
uint32_t numEngineGroups = static_cast<uint32_t>(engineGroups.size());
uint32_t numSubDeviceCopyEngineGroups = getCopyQueueGroupsFromSubDevice(std::numeric_limits<uint32_t>::max(), nullptr);
uint32_t totalEngineGroups = numEngineGroups + numSubDeviceCopyEngineGroups;
if (*pCount == 0) {
*pCount = numEngineGroups;
*pCount = totalEngineGroups;
return ZE_RESULT_SUCCESS;
}
@ -221,8 +300,8 @@ ze_result_t DeviceImp::getCommandQueueGroupProperties(uint32_t *pCount,
auto &hwHelper = NEO::HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
auto &l0HwHelper = L0HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
*pCount = std::min(numEngineGroups, *pCount);
for (uint32_t i = 0; i < *pCount; i++) {
*pCount = std::min(totalEngineGroups, *pCount);
for (uint32_t i = 0; i < std::min(numEngineGroups, *pCount); i++) {
if (engineGroups[i].engineGroupType == NEO::EngineGroupType::RenderCompute) {
pCommandQueueGroupProperties[i].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE |
ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY |
@ -244,6 +323,11 @@ ze_result_t DeviceImp::getCommandQueueGroupProperties(uint32_t *pCount,
pCommandQueueGroupProperties[i].numQueues = static_cast<uint32_t>(engineGroups[i].engines.size());
}
if (*pCount > numEngineGroups) {
uint32_t remainingEngineGroups = *pCount - numEngineGroups;
getCopyQueueGroupsFromSubDevice(remainingEngineGroups, &pCommandQueueGroupProperties[numEngineGroups]);
}
return ZE_RESULT_SUCCESS;
}
@ -930,6 +1014,9 @@ Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice, bool
}
device->createSysmanHandle(isSubDevice);
device->resourcesReleased = false;
device->populateSubDeviceCopyEngineGroups();
return device;
}
@ -1087,13 +1174,41 @@ void DeviceImp::storeReusableAllocation(NEO::GraphicsAllocation &alloc) {
allocationsForReuse->pushFrontOne(alloc);
}
bool DeviceImp::isQueueGroupOrdinalValid(uint32_t ordinal) {
auto &engineGroups = getActiveDevice()->getRegularEngineGroups();
uint32_t numEngineGroups = static_cast<uint32_t>(engineGroups.size());
auto &subDeviceCopyEngineGroups = this->getSubDeviceCopyEngineGroups();
uint32_t numSubDeviceCopyEngineGroups = static_cast<uint32_t>(subDeviceCopyEngineGroups.size());
uint32_t totalEngineGroups = numEngineGroups + numSubDeviceCopyEngineGroups;
if (ordinal >= totalEngineGroups) {
return false;
}
return true;
}
ze_result_t DeviceImp::getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr, uint32_t ordinal, uint32_t index) {
auto &engineGroups = getActiveDevice()->getRegularEngineGroups();
if ((ordinal >= engineGroups.size()) ||
(index >= engineGroups[ordinal].engines.size())) {
uint32_t numEngineGroups = static_cast<uint32_t>(engineGroups.size());
auto &subDeviceCopyEngineGroups = this->getSubDeviceCopyEngineGroups();
if (!this->isQueueGroupOrdinalValid(ordinal)) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
if (ordinal < numEngineGroups) {
if (index >= engineGroups[ordinal].engines.size()) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
*csr = engineGroups[ordinal].engines[index].commandStreamReceiver;
} else {
if (index >= subDeviceCopyEngineGroups[ordinal - numEngineGroups].engines.size()) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
*csr = subDeviceCopyEngineGroups[ordinal - numEngineGroups].engines[index].commandStreamReceiver;
}
return ZE_RESULT_SUCCESS;
}

View File

@ -55,6 +55,8 @@ struct DeviceImp : public Device {
ze_result_t setCacheAdvice(void *ptr, size_t regionSize, ze_cache_ext_region_t cacheRegion) override;
ze_result_t imageGetProperties(const ze_image_desc_t *desc, ze_image_properties_t *pImageProperties) override;
ze_result_t getDeviceImageProperties(ze_device_image_properties_t *pDeviceImageProperties) override;
uint32_t getCopyQueueGroupsFromSubDevice(uint32_t numberOfSubDeviceCopyEngineGroupsRequested,
ze_command_queue_group_properties_t *pCommandQueueGroupProperties);
ze_result_t getCommandQueueGroupProperties(uint32_t *pCount,
ze_command_queue_group_properties_t *pCommandQueueGroupProperties) override;
ze_result_t getExternalMemoryProperties(ze_device_external_memory_properties_t *pExternalMemoryProperties) override;
@ -124,8 +126,13 @@ struct DeviceImp : public Device {
std::unique_ptr<NEO::AllocationsList> allocationsForReuse;
std::unique_ptr<NEO::DriverInfo> driverInfo;
void createSysmanHandle(bool isSubDevice);
NEO::Device::EngineGroupsT &getSubDeviceCopyEngineGroups();
void populateSubDeviceCopyEngineGroups();
bool isQueueGroupOrdinalValid(uint32_t ordinal);
protected:
NEO::Device::EngineGroupsT subDeviceCopyEngineGroups{};
NEO::GraphicsAllocation *debugSurface = nullptr;
SysmanDevice *pSysmanDevice = nullptr;
std::unique_ptr<DebugSession> debugSession = nullptr;

View File

@ -200,5 +200,93 @@ struct MultipleDevicesWithCustomHwInfo {
const uint32_t numSubDevices = 2u;
};
template <uint32_t copyEngineCount, uint32_t implicitScaling>
struct SingleRootMultiSubDeviceFixtureWithImplicitScaling : public MultiDeviceFixture {
NEO::MockCompilerEnableGuard compilerMock = NEO::MockCompilerEnableGuard(true);
DebugManagerStateRestore restorer;
std::unique_ptr<Mock<L0::DriverHandleImp>> driverHandle;
std::vector<NEO::Device *> devices;
uint32_t numRootDevices = 1u;
uint32_t numSubDevices = 2u;
L0::ContextImp *context = nullptr;
L0::Device *device = nullptr;
NEO::Device *neoDevice = nullptr;
L0::DeviceImp *deviceImp = nullptr;
NEO::HardwareInfo hwInfo;
uint32_t expectedCopyEngineCount = copyEngineCount;
uint32_t expectedComputeEngineCount = 0;
uint32_t numEngineGroups = 0;
uint32_t subDeviceNumEngineGroups = 0;
void SetUp() { // NOLINT(readability-identifier-naming)
DebugManagerStateRestore restorer;
DebugManager.flags.EnableImplicitScaling.set(implicitScaling);
DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices);
DebugManager.flags.CreateMultipleSubDevices.set(numSubDevices);
NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get();
hwInfo.featureTable.flags.ftrRcsNode = false;
hwInfo.featureTable.flags.ftrCCSNode = true;
// hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4;
if (expectedCopyEngineCount != 0) {
hwInfo.capabilityTable.blitterOperationsSupported = true;
hwInfo.featureTable.ftrBcsInfo = maxNBitValue(expectedCopyEngineCount);
} else {
hwInfo.capabilityTable.blitterOperationsSupported = false;
}
if (implicitScaling) {
expectedComputeEngineCount = 1u;
} else {
expectedComputeEngineCount = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
}
MockDevice *mockDevice = MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo, 0);
NEO::DeviceVector devices;
devices.push_back(std::unique_ptr<NEO::Device>(mockDevice));
driverHandle = std::make_unique<Mock<L0::DriverHandleImp>>();
ze_result_t res = driverHandle->initialize(std::move(devices));
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
ze_context_handle_t hContext;
ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0};
res = driverHandle->createContext(&desc, 0u, nullptr, &hContext);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
context = static_cast<ContextImp *>(Context::fromHandle(hContext));
device = driverHandle->devices[0];
neoDevice = device->getNEODevice();
deviceImp = static_cast<L0::DeviceImp *>(device);
NEO::Device *activeDevice = deviceImp->getActiveDevice();
auto &engineGroups = activeDevice->getRegularEngineGroups();
numEngineGroups = static_cast<uint32_t>(engineGroups.size());
if (activeDevice->getSubDevices().size() > 0) {
NEO::Device *activeSubDevice = activeDevice->getSubDevice(0u);
(void)activeSubDevice;
auto &subDeviceEngineGroups = activeSubDevice->getRegularEngineGroups();
(void)subDeviceEngineGroups;
for (uint32_t i = 0; i < subDeviceEngineGroups.size(); i++) {
if (subDeviceEngineGroups[i].engineGroupType == NEO::EngineGroupType::Copy ||
subDeviceEngineGroups[i].engineGroupType == NEO::EngineGroupType::LinkedCopy) {
subDeviceNumEngineGroups += 1;
}
}
}
}
void TearDown() { // NOLINT(readability-identifier-naming)
context->destroy();
}
};
} // namespace ult
} // namespace L0

View File

@ -11,6 +11,7 @@
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/test.h"
#include "level_zero/core/source/cmdlist/cmdlist_imp.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
@ -124,6 +125,238 @@ HWTEST2_F(DeviceTestXeHpc, givenXeHpcBStepWhenCreatingMultiTileDeviceThenExpectI
delete device;
}
using MultiDeviceCommandQueueGroupWithNineCopyEnginesTest = Test<SingleRootMultiSubDeviceFixtureWithImplicitScaling<9, 1>>;
HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest, givenMainAndLinkCopyEngineSupportAndCCSAndImplicitScalingThenExpectedQueueGroupsAreReturned, IsXeHpcCore) {
uint32_t count = 0;
ze_result_t res = deviceImp->getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_EQ(count, numEngineGroups + subDeviceNumEngineGroups);
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp->getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
uint32_t numCopyQueues = 0;
for (uint32_t i = 0; i < count; i++) {
if (properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
EXPECT_EQ(properties[i].numQueues, 1u);
} else if ((properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY) &&
!(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE)) {
numCopyQueues += properties[i].numQueues;
}
}
EXPECT_EQ(numCopyQueues, expectedCopyEngineCount);
}
HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest,
givenMainAndLinkCopyEngineSupportAndCCSAndImplicitScalingThenCommandListCreatedWithCorrectDevice, IsXeHpcCore) {
uint32_t count = 0;
ze_result_t res = deviceImp->getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_EQ(count, numEngineGroups + subDeviceNumEngineGroups);
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp->getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
uint32_t numCopyQueues = 0;
for (uint32_t i = 0; i < count; i++) {
if (properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
EXPECT_EQ(properties[i].numQueues, 1u);
} else if ((properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY) &&
!(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE)) {
numCopyQueues += properties[i].numQueues;
}
}
EXPECT_EQ(numCopyQueues, expectedCopyEngineCount);
ze_command_list_handle_t hComputeCommandList{};
ze_command_list_desc_t computeDesc{};
computeDesc.commandQueueGroupOrdinal = numEngineGroups - 1;
res = deviceImp->createCommandList(&computeDesc, &hComputeCommandList);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
CommandListImp *computeCommandList = static_cast<CommandListImp *>(CommandList::fromHandle(hComputeCommandList));
EXPECT_FALSE(computeCommandList->isCopyOnly());
ze_command_queue_handle_t hCommandQueue{};
ze_command_queue_desc_t computeCommandQueueDesc{};
computeCommandQueueDesc.ordinal = computeDesc.commandQueueGroupOrdinal;
res = device->createCommandQueue(&computeCommandQueueDesc, &hCommandQueue);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
CommandQueue *computeCommandQueue = static_cast<CommandQueue *>(CommandQueue::fromHandle(hCommandQueue));
EXPECT_FALSE(computeCommandQueue->peekIsCopyOnlyCommandQueue());
ze_command_list_handle_t hCopyCommandList{};
ze_command_list_desc_t copyDesc{};
copyDesc.commandQueueGroupOrdinal = numEngineGroups + 1;
res = deviceImp->createCommandList(&copyDesc, &hCopyCommandList);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
CommandListImp *copyCommandList = static_cast<CommandListImp *>(CommandList::fromHandle(hCopyCommandList));
EXPECT_TRUE(copyCommandList->isCopyOnly());
computeCommandQueue->destroy();
computeCommandList->destroy();
copyCommandList->destroy();
}
HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest,
givenMainAndLinkCopyEngineSupportAndCCSAndImplicitScalingWhenPassingIncorrectIndexThenInvalidArgumentIsReturned, IsXeHpcCore) {
uint32_t count = 0;
ze_result_t res = deviceImp->getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_EQ(count, numEngineGroups + subDeviceNumEngineGroups);
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp->getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
uint32_t numCopyQueues = 0;
for (uint32_t i = 0; i < count; i++) {
if (properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
EXPECT_EQ(properties[i].numQueues, 1u);
} else if ((properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY) &&
!(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE)) {
numCopyQueues += properties[i].numQueues;
}
}
EXPECT_EQ(numCopyQueues, expectedCopyEngineCount);
ze_command_queue_handle_t hCommandQueue{};
ze_command_queue_desc_t computeCommandQueueDesc{};
computeCommandQueueDesc.ordinal = numEngineGroups + 1;
computeCommandQueueDesc.index = numCopyQueues + 2;
res = device->createCommandQueue(&computeCommandQueueDesc, &hCommandQueue);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res);
}
HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest,
givenMainAndLinkCopyEngineSupportAndCCSAndImplicitScalingThenImmediateCommandListCreatedWithCorrectDevice, IsXeHpcCore) {
uint32_t count = 0;
ze_result_t res = deviceImp->getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_EQ(count, numEngineGroups + subDeviceNumEngineGroups);
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp->getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
uint32_t numCopyQueues = 0;
for (uint32_t i = 0; i < count; i++) {
if (properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
EXPECT_EQ(properties[i].numQueues, 1u);
} else if ((properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY) &&
!(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE)) {
numCopyQueues += properties[i].numQueues;
}
}
EXPECT_EQ(numCopyQueues, expectedCopyEngineCount);
ze_command_list_handle_t hComputeCommandList{};
ze_command_queue_desc_t computeDesc{};
computeDesc.ordinal = numEngineGroups - 1;
res = deviceImp->createCommandListImmediate(&computeDesc, &hComputeCommandList);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
CommandListImp *computeCommandList = static_cast<CommandListImp *>(CommandList::fromHandle(hComputeCommandList));
EXPECT_FALSE(computeCommandList->isCopyOnly());
ze_command_list_handle_t hCopyCommandList{};
ze_command_queue_desc_t copyDesc{};
copyDesc.ordinal = numEngineGroups + 1;
res = deviceImp->createCommandListImmediate(&copyDesc, &hCopyCommandList);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
CommandListImp *copyCommandList = static_cast<CommandListImp *>(CommandList::fromHandle(hCopyCommandList));
EXPECT_TRUE(copyCommandList->isCopyOnly());
computeCommandList->destroy();
copyCommandList->destroy();
}
HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest, givenMainAndLinkCopyEngineSupportAndCCSAndImplicitScalingWhenRequestingFewerGroupsThenExpectedGroupsAreReturned, IsXeHpcCore) {
uint32_t count = 0;
ze_result_t res = deviceImp->getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_EQ(count, numEngineGroups + subDeviceNumEngineGroups);
count--;
std::vector<ze_command_queue_group_properties_t> properties(count);
deviceImp->getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
uint32_t numCopyQueues = 0;
for (uint32_t i = 0; i < count; i++) {
if (properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
EXPECT_EQ(properties[i].numQueues, 1u);
} else if ((properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY) &&
!(properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE)) {
numCopyQueues += properties[i].numQueues;
}
}
EXPECT_LE(numCopyQueues, expectedCopyEngineCount);
}
HWTEST2_F(MultiDeviceCommandQueueGroupWithNineCopyEnginesTest, givenMainAndLinkCopyEngineSupportAndCCSAndImplicitScalingWhenRequestingOnlyOneGroupThenOneQueueGroupIsReturned, IsXeHpcCore) {
uint32_t count = 0;
ze_result_t res = deviceImp->getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_EQ(count, numEngineGroups + subDeviceNumEngineGroups);
count = 1;
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp->getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(count, 1u);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
for (uint32_t i = 0; i < count; i++) {
if (properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
EXPECT_EQ(properties[i].numQueues, expectedComputeEngineCount);
}
}
}
using MultiDeviceCommandQueueGroupWithNoCopyEnginesTest = Test<SingleRootMultiSubDeviceFixtureWithImplicitScaling<0, 1>>;
HWTEST2_F(MultiDeviceCommandQueueGroupWithNoCopyEnginesTest,
givenNoCopyEngineSupportAndCCSAndImplicitScalingThenExpectedQueueGroupsAreReturned, IsXeHpcCore) {
uint32_t count = 0;
ze_result_t res = deviceImp->getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_EQ(count, numEngineGroups);
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp->getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
for (uint32_t i = 0; i < count; i++) {
if (properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
EXPECT_EQ(properties[i].numQueues, expectedComputeEngineCount);
}
}
}
using MultiDeviceCommandQueueGroupWithNoCopyEnginesAndNoImplicitScalingTest = Test<SingleRootMultiSubDeviceFixtureWithImplicitScaling<0, 0>>;
HWTEST2_F(MultiDeviceCommandQueueGroupWithNoCopyEnginesAndNoImplicitScalingTest,
givenNoCopyEngineSupportAndCCSAndNoImplicitScalingThenOnlyTheQueueGroupsFromSubDeviceAreReturned, IsXeHpcCore) {
uint32_t count = 0;
ze_result_t res = deviceImp->getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_EQ(count, numEngineGroups);
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp->getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
for (uint32_t i = 0; i < count; i++) {
if (properties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
EXPECT_EQ(properties[i].numQueues, expectedComputeEngineCount);
}
}
}
using CommandQueueGroupTest = Test<DeviceFixture>;
HWTEST2_F(CommandQueueGroupTest, givenNoBlitterSupportAndNoCCSThenOneQueueGroupIsReturned, IsXeHpcCore) {

View File

@ -36,7 +36,12 @@ When doing allocations in implicit scaling mode, driver *colors* an allocation a
When scheduling a kernel for execution, driver distributes the kernel workgroups among the available tiles. Default mechanism is called *Static Partitioning*, where the workgroups are evenly distributed among tiles. For instance, in a 2-tile system, half of the workgroups go to tile 0, and the other half to tile 1.
The number of CCSs, or compute engines, currently available with implicit scaing on the root device is one. This is because with implicit scaling the driver automatically uses all the EUs available in the device, so no other CCSs are exposed. Even though only one CCS is exposed, multiple kernels submitted to the root device using implicit scaling may execute concurrently on PVC, depending on EU availability. On XeHP_SDV, they may be serialized. See [Limitations](#Limitations) section below.
The number of CCSs, or compute engines, currently available with implicit scaling on the root device is one. This is because with implicit scaling the driver automatically uses all the EUs available in the device, so no other CCSs are exposed. Even though only one CCS is exposed, multiple kernels submitted to the root device using implicit scaling may execute concurrently on PVC, depending on EU availability. On XeHP_SDV, they may be serialized. See [Limitations](#Limitations) section below.
No implicit scaling support is available for BCSs. Considering that, two models are followed in terms of discovery of copy engines:
* In Level Zero, the copy engines from sub-device 0 are exposed also in the root device. This to align the engine model on both the implicit and the non-implicit-scaling scenarios.
* In OpenCL, copy engines are not exposed in the root device.
Since implicit scaling is only done for EUs, which are associated only with kernels submitted to CCS, BCSs are currently not being exposed and access to them are done through sub-device handles.