Add ze_eu_count_t to get total number of EUs

Related-To: LOCI-2667

Signed-off-by: Jaime Arteaga <jaime.a.arteaga.molina@intel.com>
This commit is contained in:
Jaime Arteaga
2022-01-31 03:04:54 +00:00
committed by Compute-Runtime-Automation
parent b1731626d0
commit 1a08240474
4 changed files with 94 additions and 0 deletions

View File

@ -749,6 +749,21 @@ ze_result_t DeviceImp::getProperties(ze_device_properties_t *pDeviceProperties)
}
}
if (NEO::DebugManager.flags.EnableL0EuCount.get()) {
if (pDeviceProperties->pNext) {
ze_base_desc_t *extendedDesc = reinterpret_cast<ze_base_desc_t *>(pDeviceProperties->pNext);
if (extendedDesc->stype == ZE_STRUCTURE_TYPE_EU_COUNT_EXT) {
ze_eu_count_ext_t *zeEuCountDesc = reinterpret_cast<ze_eu_count_ext_t *>(extendedDesc);
uint32_t numTotalEUs = hardwareInfo.gtSystemInfo.MaxEuPerSubSlice * hardwareInfo.gtSystemInfo.SubSliceCount * hardwareInfo.gtSystemInfo.SliceCount;
if (isImplicitScalingCapable()) {
numTotalEUs *= neoDevice->getNumGenericSubDevices();
}
zeEuCountDesc->numTotalEUs = numTotalEUs;
}
}
}
return ZE_RESULT_SUCCESS;
}

View File

@ -1078,6 +1078,60 @@ TEST_F(DeviceTest, givenDevicePropertiesStructureWhenDebugVariableOverrideDevice
EXPECT_STREQ(deviceProperties.name, testDeviceName.c_str());
}
TEST_F(DeviceTest, WhenRequestingZeEuCountThenExpectedEUsAreReturned) {
DebugManagerStateRestore restore;
DebugManager.flags.EnableL0EuCount.set(true);
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
ze_eu_count_ext_t zeEuCountDesc = {ZE_STRUCTURE_TYPE_EU_COUNT_EXT};
deviceProperties.pNext = &zeEuCountDesc;
uint32_t maxEuPerSubSlice = 48;
uint32_t subSliceCount = 8;
uint32_t sliceCount = 1;
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.MaxEuPerSubSlice = maxEuPerSubSlice;
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.SubSliceCount = subSliceCount;
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.SliceCount = sliceCount;
device->getProperties(&deviceProperties);
uint32_t expectedEUs = maxEuPerSubSlice * subSliceCount * sliceCount;
EXPECT_EQ(expectedEUs, zeEuCountDesc.numTotalEUs);
}
TEST_F(DeviceTest, WhenRequestingZeEuCountWithoutDebugKeyThenNoEusAreReturned) {
DebugManagerStateRestore restore;
DebugManager.flags.EnableL0EuCount.set(false);
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
ze_eu_count_ext_t zeEuCountDesc = {ZE_STRUCTURE_TYPE_EU_COUNT_EXT};
zeEuCountDesc.numTotalEUs = std::numeric_limits<uint32_t>::max();
deviceProperties.pNext = &zeEuCountDesc;
uint32_t maxEuPerSubSlice = 48;
uint32_t subSliceCount = 8;
uint32_t sliceCount = 1;
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.MaxEuPerSubSlice = maxEuPerSubSlice;
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.SubSliceCount = subSliceCount;
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.SliceCount = sliceCount;
device->getProperties(&deviceProperties);
uint32_t expectedEUs = maxEuPerSubSlice * subSliceCount * sliceCount;
EXPECT_NE(expectedEUs, zeEuCountDesc.numTotalEUs);
EXPECT_EQ(std::numeric_limits<uint32_t>::max(), zeEuCountDesc.numTotalEUs);
}
TEST_F(DeviceTest, WhenRequestingZeEuCountWithIncorrectStypeThenPNextIsIgnored) {
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
ze_eu_count_ext_t zeEuCountDesc = {ZE_STRUCTURE_TYPE_SCHEDULING_HINT_EXP_PROPERTIES};
deviceProperties.pNext = &zeEuCountDesc;
device->getProperties(&deviceProperties);
EXPECT_EQ(0u, zeEuCountDesc.numTotalEUs);
}
TEST_F(DeviceTest, WhenGettingDevicePropertiesThenSubslicesPerSliceIsBasedOnSubslicesSupported) {
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
deviceProperties.type = ZE_DEVICE_TYPE_GPU;
@ -1935,6 +1989,29 @@ TEST_F(MultipleDevicesDisabledImplicitScalingTest, whenCallingGetMemoryPropertie
EXPECT_EQ(memProperties.totalSize, device0->getNEODevice()->getDeviceInfo().globalMemSize / numSubDevices);
}
TEST_F(MultipleDevicesEnabledImplicitScalingTest, WhenRequestingZeEuCountThenExpectedEUsAreReturned) {
DebugManagerStateRestore restore;
DebugManager.flags.EnableL0EuCount.set(true);
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
ze_eu_count_ext_t zeEuCountDesc = {ZE_STRUCTURE_TYPE_EU_COUNT_EXT};
deviceProperties.pNext = &zeEuCountDesc;
uint32_t maxEuPerSubSlice = 48;
uint32_t subSliceCount = 8;
uint32_t sliceCount = 1;
L0::Device *device = driverHandle->devices[0];
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.MaxEuPerSubSlice = maxEuPerSubSlice;
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.SubSliceCount = subSliceCount;
device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo()->gtSystemInfo.SliceCount = sliceCount;
device->getProperties(&deviceProperties);
uint32_t expectedEUs = maxEuPerSubSlice * subSliceCount * sliceCount;
EXPECT_EQ(expectedEUs * numSubDevices, zeEuCountDesc.numTotalEUs);
}
TEST_F(MultipleDevicesEnabledImplicitScalingTest, whenCallingGetMemoryPropertiesWithSubDevicesThenCorrectSizeReturned) {
L0::Device *device0 = driverHandle->devices[0];
uint32_t count = 1;

View File

@ -333,6 +333,7 @@ DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address
/*FEATURE FLAGS*/
DECLARE_DEBUG_VARIABLE(bool, EnableL0ReadLUIDExtension, false, "Enables Support for L0 Extension for reading the LUID from WDDM.")
DECLARE_DEBUG_VARIABLE(bool, EnableL0EuCount, false, "Enables Support for L0 Extension for querying total nubmer of EUs.")
DECLARE_DEBUG_VARIABLE(bool, USMEvictAfterMigration, false, "Evict USM allocation after implicit migration to GPU")
DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension")
DECLARE_DEBUG_VARIABLE(bool, EnablePackedYuv, true, "Enables cl_packed_yuv extension")

View File

@ -105,6 +105,7 @@ DirectSubmissionDisableMonitorFence = -1
DirectSubmissionPrintBuffers = 0
DirectSubmissionMaxRingBuffers = -1
EnableL0ReadLUIDExtension = 0
EnableL0EuCount = 0
USMEvictAfterMigration = 0
EnableDirectSubmissionController = -1
DirectSubmissionControllerTimeout = -1