diff --git a/level_zero/api/core/ze_core_loader.cpp b/level_zero/api/core/ze_core_loader.cpp index 36e9086c41..f6aae45274 100644 --- a/level_zero/api/core/ze_core_loader.cpp +++ b/level_zero/api/core/ze_core_loader.cpp @@ -238,6 +238,7 @@ zeGetDeviceProcAddrTable( fillDdiEntry(pDdiTable->pfnGetRootDevice, L0::globalDriverDispatch.coreDevice.pfnGetRootDevice, version, ZE_API_VERSION_1_7); fillDdiEntry(pDdiTable->pfnImportExternalSemaphoreExt, L0::globalDriverDispatch.coreDevice.pfnImportExternalSemaphoreExt, version, ZE_API_VERSION_1_12); fillDdiEntry(pDdiTable->pfnReleaseExternalSemaphoreExt, L0::globalDriverDispatch.coreDevice.pfnReleaseExternalSemaphoreExt, version, ZE_API_VERSION_1_12); + fillDdiEntry(pDdiTable->pfnGetVectorWidthPropertiesExt, L0::globalDriverDispatch.coreDevice.pfnGetVectorWidthPropertiesExt, version, ZE_API_VERSION_1_13); driverDdiTable.coreDdiTable.Device = *pDdiTable; if (driverDdiTable.enableTracing) { fillDdiEntry(pDdiTable->pfnGet, zeDeviceGetTracing, version, ZE_API_VERSION_1_0); diff --git a/level_zero/api/core/ze_device_api_entrypoints.h b/level_zero/api/core/ze_device_api_entrypoints.h index ee214f8e1c..4d3e356a47 100644 --- a/level_zero/api/core/ze_device_api_entrypoints.h +++ b/level_zero/api/core/ze_device_api_entrypoints.h @@ -152,6 +152,13 @@ ze_result_t zeDeviceReleaseExternalSemaphoreExt( return L0::ExternalSemaphoreImp::fromHandle(hSemaphore)->releaseExternalSemaphore(); } +ze_result_t zeDeviceGetVectorWidthPropertiesExt( + ze_device_handle_t hDevice, + uint32_t *pCount, + ze_device_vector_width_properties_ext_t *pVectorWidthProperties) { + return L0::Device::fromHandle(hDevice)->getVectorWidthPropertiesExt(pCount, pVectorWidthProperties); +} + uint32_t zerDeviceTranslateToIdentifier(ze_device_handle_t device) { if (!device) { auto driverHandle = static_cast(L0::globalDriverHandles->front()); @@ -350,6 +357,13 @@ ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceReleaseExternalSemaphoreExt( return L0::ExternalSemaphoreImp::fromHandle(hSemaphore)->releaseExternalSemaphore(); } +ZE_APIEXPORT ze_result_t ZE_APICALL zeDeviceGetVectorWidthPropertiesExt( + ze_device_handle_t hDevice, + uint32_t *pCount, + ze_device_vector_width_properties_ext_t *pVectorWidthProperties) { + return L0::zeDeviceGetVectorWidthPropertiesExt(hDevice, pCount, pVectorWidthProperties); +} + uint32_t ZE_APICALL zerDeviceTranslateToIdentifier(ze_device_handle_t device) { return L0::zerDeviceTranslateToIdentifier(device); } diff --git a/level_zero/core/source/device/device.h b/level_zero/core/source/device/device.h index 25291cda89..3a81f7587b 100644 --- a/level_zero/core/source/device/device.h +++ b/level_zero/core/source/device/device.h @@ -76,6 +76,7 @@ struct Device : _ze_device_handle_t { virtual ze_result_t getMemoryProperties(uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) = 0; virtual ze_result_t getMemoryAccessProperties(ze_device_memory_access_properties_t *pMemAccessProperties) = 0; virtual ze_result_t getProperties(ze_device_properties_t *pDeviceProperties) = 0; + virtual ze_result_t getVectorWidthPropertiesExt(uint32_t *pCount, ze_device_vector_width_properties_ext_t *pVectorWidthProperties) = 0; virtual ze_result_t getSubDevices(uint32_t *pCount, ze_device_handle_t *phSubdevices) = 0; virtual ze_result_t getCacheProperties(uint32_t *pCount, ze_device_cache_properties_t *pCacheProperties) = 0; virtual ze_result_t getStatus() = 0; diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index 1a4e890bc3..846e3af7ce 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -1000,6 +1000,36 @@ ze_result_t DeviceImp::getKernelProperties(ze_device_module_properties_t *pKerne return ZE_RESULT_SUCCESS; } +ze_result_t DeviceImp::getVectorWidthPropertiesExt(uint32_t *pCount, ze_device_vector_width_properties_ext_t *pVectorWidthProperties) { + if (*pCount == 0) { + *pCount = 1; + return ZE_RESULT_SUCCESS; + } + if (pVectorWidthProperties == nullptr) { + return ZE_RESULT_ERROR_INVALID_ARGUMENT; + } + if (*pCount > 1) { + *pCount = 1; + } + auto &gfxCoreHelper = this->neoDevice->getGfxCoreHelper(); + auto vectorWidthSize = gfxCoreHelper.getMinimalSIMDSize(); + pVectorWidthProperties[0].vector_width_size = vectorWidthSize; + pVectorWidthProperties[0].preferred_vector_width_char = gfxCoreHelper.getPreferredVectorWidthChar(vectorWidthSize); + pVectorWidthProperties[0].preferred_vector_width_short = gfxCoreHelper.getPreferredVectorWidthShort(vectorWidthSize); + pVectorWidthProperties[0].preferred_vector_width_int = gfxCoreHelper.getPreferredVectorWidthInt(vectorWidthSize); + pVectorWidthProperties[0].preferred_vector_width_long = gfxCoreHelper.getPreferredVectorWidthLong(vectorWidthSize); + pVectorWidthProperties[0].preferred_vector_width_float = gfxCoreHelper.getPreferredVectorWidthFloat(vectorWidthSize); + pVectorWidthProperties[0].preferred_vector_width_half = gfxCoreHelper.getPreferredVectorWidthHalf(vectorWidthSize); + pVectorWidthProperties[0].native_vector_width_char = gfxCoreHelper.getNativeVectorWidthChar(vectorWidthSize); + pVectorWidthProperties[0].native_vector_width_short = gfxCoreHelper.getNativeVectorWidthShort(vectorWidthSize); + pVectorWidthProperties[0].native_vector_width_int = gfxCoreHelper.getNativeVectorWidthInt(vectorWidthSize); + pVectorWidthProperties[0].native_vector_width_long = gfxCoreHelper.getNativeVectorWidthLong(vectorWidthSize); + pVectorWidthProperties[0].native_vector_width_float = gfxCoreHelper.getNativeVectorWidthFloat(vectorWidthSize); + pVectorWidthProperties[0].native_vector_width_half = gfxCoreHelper.getNativeVectorWidthHalf(vectorWidthSize); + + return ZE_RESULT_SUCCESS; +} + ze_result_t DeviceImp::getProperties(ze_device_properties_t *pDeviceProperties) { const auto &deviceInfo = this->neoDevice->getDeviceInfo(); const auto &hardwareInfo = this->neoDevice->getHardwareInfo(); diff --git a/level_zero/core/source/device/device_imp.h b/level_zero/core/source/device/device_imp.h index 4bab954c54..d26031a0f9 100644 --- a/level_zero/core/source/device/device_imp.h +++ b/level_zero/core/source/device/device_imp.h @@ -59,6 +59,7 @@ struct DeviceImp : public Device, NEO::NonCopyableAndNonMovableClass { ze_result_t getMemoryProperties(uint32_t *pCount, ze_device_memory_properties_t *pMemProperties) override; ze_result_t getMemoryAccessProperties(ze_device_memory_access_properties_t *pMemAccessProperties) override; ze_result_t getProperties(ze_device_properties_t *pDeviceProperties) override; + ze_result_t getVectorWidthPropertiesExt(uint32_t *pCount, ze_device_vector_width_properties_ext_t *pVectorWidthProperties) override; ze_result_t getSubDevices(uint32_t *pCount, ze_device_handle_t *phSubdevices) override; ze_result_t getCacheProperties(uint32_t *pCount, ze_device_cache_properties_t *pCacheProperties) override; ze_result_t reserveCache(size_t cacheLevel, size_t cacheReservationSize) override; diff --git a/level_zero/core/source/driver/driver_handle_imp_helper.cpp b/level_zero/core/source/driver/driver_handle_imp_helper.cpp index 44917b6fe6..cc05eda65b 100644 --- a/level_zero/core/source/driver/driver_handle_imp_helper.cpp +++ b/level_zero/core/source/driver/driver_handle_imp_helper.cpp @@ -39,6 +39,7 @@ const std::vector> DriverHandleImp::extensionsS {ZE_GET_KERNEL_BINARY_EXP_NAME, ZE_KERNEL_GET_BINARY_EXP_VERSION_1_0}, {ZE_EXTERNAL_SEMAPHORES_EXTENSION_NAME, ZE_EXTERNAL_SEMAPHORE_EXT_VERSION_1_0}, {ZE_CACHELINE_SIZE_EXT_NAME, ZE_DEVICE_CACHE_LINE_SIZE_EXT_VERSION_1_0}, + {ZE_DEVICE_VECTOR_SIZES_EXT_NAME, ZE_DEVICE_VECTOR_SIZES_EXT_VERSION_1_0}, // Driver experimental extensions {ZE_INTEL_DEVICE_MODULE_DP_PROPERTIES_EXP_NAME, ZE_INTEL_DEVICE_MODULE_DP_PROPERTIES_EXP_VERSION_CURRENT}, diff --git a/level_zero/core/test/unit_tests/mocks/mock_device.h b/level_zero/core/test/unit_tests/mocks/mock_device.h index f833163a5d..9d45dafe42 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_device.h +++ b/level_zero/core/test/unit_tests/mocks/mock_device.h @@ -37,6 +37,7 @@ struct MockDevice : public Device { ADDMETHOD_NOBASE(getP2PProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hPeerDevice, ze_device_p2p_properties_t *pP2PProperties)); ADDMETHOD_NOBASE(getKernelProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_module_properties_t * pKernelProperties)); ADDMETHOD_NOBASE(getPciProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_pci_ext_properties_t * pPciProperties)); + ADDMETHOD_NOBASE(getVectorWidthPropertiesExt, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t * pCount, ze_device_vector_width_properties_ext_t *pVectorWidthProperties)); ADDMETHOD_NOBASE(getMemoryProperties, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t * pCount, ze_device_memory_properties_t *pMemProperties)); ADDMETHOD_NOBASE(getMemoryAccessProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_memory_access_properties_t * pMemAccessProperties)); ADDMETHOD_NOBASE(getProperties, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_properties_t * pDeviceProperties)); diff --git a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp index ed600e356c..baa9537bb9 100644 --- a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp +++ b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp @@ -1295,6 +1295,62 @@ TEST_F(DeviceTest, givenDevicePropertiesStructureWhenDevicePropertiesCalledThenA EXPECT_EQ(3u, deviceProperties.numSlices); } +TEST_F(DeviceTest, givenNullPointerWhenCallingGetVectorWidthPropertiesExtThenInvalidArgumentIsReturned) { + ze_device_vector_width_properties_ext_t *vectorWidthProperties = nullptr; + auto deviceImp = static_cast(device); + uint32_t pCount = 1; + ze_result_t result = deviceImp->getVectorWidthPropertiesExt(&pCount, vectorWidthProperties); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); +} + +TEST_F(DeviceTest, givenValidPointerWhenCallingGetVectorWidthPropertiesExtThenPropertiesAreSetCorrectly) { + ze_device_vector_width_properties_ext_t vectorWidthProperties = {}; + vectorWidthProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_VECTOR_WIDTH_PROPERTIES_EXT; + vectorWidthProperties.pNext = nullptr; + uint32_t pCount = 0; + + auto deviceImp = static_cast(device); + ze_result_t result = deviceImp->getVectorWidthPropertiesExt(&pCount, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(1u, pCount); + + pCount = 2; // Check that the pCount is updated to the correct limit. + result = deviceImp->getVectorWidthPropertiesExt(&pCount, &vectorWidthProperties); + EXPECT_EQ(1u, pCount); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(16u, vectorWidthProperties.preferred_vector_width_char); + EXPECT_EQ(8u, vectorWidthProperties.preferred_vector_width_short); + EXPECT_EQ(4u, vectorWidthProperties.preferred_vector_width_int); + EXPECT_EQ(1u, vectorWidthProperties.preferred_vector_width_long); + EXPECT_EQ(1u, vectorWidthProperties.preferred_vector_width_float); + EXPECT_EQ(8u, vectorWidthProperties.preferred_vector_width_half); + EXPECT_EQ(16u, vectorWidthProperties.native_vector_width_char); + EXPECT_EQ(8u, vectorWidthProperties.native_vector_width_short); + EXPECT_EQ(4u, vectorWidthProperties.native_vector_width_int); + EXPECT_EQ(1u, vectorWidthProperties.native_vector_width_long); + EXPECT_EQ(1u, vectorWidthProperties.native_vector_width_float); + EXPECT_EQ(8u, vectorWidthProperties.native_vector_width_half); + + // Check that using the pCount updated to the correct limit, that this still works. + result = deviceImp->getVectorWidthPropertiesExt(&pCount, &vectorWidthProperties); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(16u, vectorWidthProperties.preferred_vector_width_char); + EXPECT_EQ(8u, vectorWidthProperties.preferred_vector_width_short); + EXPECT_EQ(4u, vectorWidthProperties.preferred_vector_width_int); + EXPECT_EQ(1u, vectorWidthProperties.preferred_vector_width_long); + EXPECT_EQ(1u, vectorWidthProperties.preferred_vector_width_float); + EXPECT_EQ(8u, vectorWidthProperties.preferred_vector_width_half); + EXPECT_EQ(16u, vectorWidthProperties.native_vector_width_char); + EXPECT_EQ(8u, vectorWidthProperties.native_vector_width_short); + EXPECT_EQ(4u, vectorWidthProperties.native_vector_width_int); + EXPECT_EQ(1u, vectorWidthProperties.native_vector_width_long); + EXPECT_EQ(1u, vectorWidthProperties.native_vector_width_float); + EXPECT_EQ(8u, vectorWidthProperties.native_vector_width_half); +} + TEST_F(DeviceTest, givenDevicePropertiesStructureWhenDriverInfoIsEmptyThenDeviceNameTheSameAsInDeviceInfo) { auto deviceImp = static_cast(device); ze_device_properties_t deviceProperties{}; diff --git a/level_zero/core/test/unit_tests/sources/driver/test_driver.cpp b/level_zero/core/test/unit_tests/sources/driver/test_driver.cpp index e17729fdc4..b38dbf80cb 100644 --- a/level_zero/core/test/unit_tests/sources/driver/test_driver.cpp +++ b/level_zero/core/test/unit_tests/sources/driver/test_driver.cpp @@ -1703,6 +1703,7 @@ TEST_F(DriverExtensionsTest, givenDriverHandleWhenAskingForExtensionsThenReturnC verifyExtensionDefinition(ZE_GET_KERNEL_BINARY_EXP_NAME, ZE_KERNEL_GET_BINARY_EXP_VERSION_1_0); verifyExtensionDefinition(ZE_EXTERNAL_SEMAPHORES_EXTENSION_NAME, ZE_EXTERNAL_SEMAPHORE_EXT_VERSION_1_0); verifyExtensionDefinition(ZE_CACHELINE_SIZE_EXT_NAME, ZE_DEVICE_CACHE_LINE_SIZE_EXT_VERSION_1_0); + verifyExtensionDefinition(ZE_DEVICE_VECTOR_SIZES_EXT_NAME, ZE_DEVICE_VECTOR_SIZES_EXT_VERSION_1_0); // Driver experimental extensions verifyExtensionDefinition(ZE_INTEL_DEVICE_MODULE_DP_PROPERTIES_EXP_NAME, ZE_INTEL_DEVICE_MODULE_DP_PROPERTIES_EXP_VERSION_CURRENT); diff --git a/level_zero/ddi/ze_ddi_tables.cpp b/level_zero/ddi/ze_ddi_tables.cpp index 8ad07881ea..d0cc5e0362 100644 --- a/level_zero/ddi/ze_ddi_tables.cpp +++ b/level_zero/ddi/ze_ddi_tables.cpp @@ -18,9 +18,11 @@ namespace L0 { DriverDispatch globalDriverDispatch; DriverDispatch::DriverDispatch() { - this->core.version = ZE_API_VERSION_1_12; + this->core.version = ZE_API_VERSION_1_13; this->core.RTASBuilderExp = &this->coreRTASBuilderExp; + this->core.RTASBuilder = &this->coreRTASBuilder; this->core.RTASParallelOperationExp = &this->coreRTASParallelOperationExp; + this->core.RTASParallelOperation = &this->coreRTASParallelOperation; this->core.Global = &this->coreGlobal; this->core.Driver = &this->coreDriver; this->core.DriverExp = &this->coreDriverExp; @@ -133,6 +135,7 @@ DriverDispatch::DriverDispatch() { this->coreDevice.pfnGetRootDevice = L0::zeDeviceGetRootDevice; this->coreDevice.pfnImportExternalSemaphoreExt = L0::zeDeviceImportExternalSemaphoreExt; this->coreDevice.pfnReleaseExternalSemaphoreExt = L0::zeDeviceReleaseExternalSemaphoreExt; + this->coreDevice.pfnGetVectorWidthPropertiesExt = L0::zeDeviceGetVectorWidthPropertiesExt; this->coreDeviceExp.pfnGetFabricVertexExp = L0::zeDeviceGetFabricVertexExp; this->coreContext.pfnCreate = L0::zeContextCreate; this->coreContext.pfnDestroy = L0::zeContextDestroy; diff --git a/level_zero/ddi/ze_ddi_tables.h b/level_zero/ddi/ze_ddi_tables.h index 922b0b75de..df18cd80b5 100644 --- a/level_zero/ddi/ze_ddi_tables.h +++ b/level_zero/ddi/ze_ddi_tables.h @@ -40,7 +40,9 @@ struct DriverDispatch { zes_dditable_driver_t sysman{}; ze_rtas_builder_exp_dditable_t coreRTASBuilderExp{}; + ze_rtas_builder_dditable_t coreRTASBuilder{}; ze_rtas_parallel_operation_exp_dditable_t coreRTASParallelOperationExp{}; + ze_rtas_parallel_operation_dditable_t coreRTASParallelOperation{}; ze_global_dditable_t coreGlobal{}; ze_driver_dditable_t coreDriver{}; ze_driver_exp_dditable_t coreDriverExp{}; diff --git a/opencl/source/cl_device/cl_device_caps.cpp b/opencl/source/cl_device/cl_device_caps.cpp index 66040d0915..05c34ca18b 100644 --- a/opencl/source/cl_device/cl_device_caps.cpp +++ b/opencl/source/cl_device/cl_device_caps.cpp @@ -149,11 +149,11 @@ void ClDevice::initializeCaps() { deviceInfo.independentForwardProgress = hwInfo.capabilityTable.supportsIndependentForwardProgress; deviceInfo.maxNumOfSubGroups = 0; - if (ocl21FeaturesEnabled) { + auto simdSizeUsed = debugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() + ? CommonConstants::maximalSimdSize + : gfxCoreHelper.getMinimalSIMDSize(); - auto simdSizeUsed = debugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() - ? CommonConstants::maximalSimdSize - : gfxCoreHelper.getMinimalSIMDSize(); + if (ocl21FeaturesEnabled) { // calculate a maximum number of subgroups in a workgroup (for the required SIMD size) deviceInfo.maxNumOfSubGroups = static_cast(sharedDeviceInfo.maxWorkGroupSize / simdSizeUsed); @@ -233,18 +233,18 @@ void ClDevice::initializeCaps() { deviceInfo.partitionAffinityDomain = 0; } deviceInfo.partitionType[0] = 0; - deviceInfo.preferredVectorWidthChar = 16; - deviceInfo.preferredVectorWidthShort = 8; - deviceInfo.preferredVectorWidthInt = 4; - deviceInfo.preferredVectorWidthLong = 1; - deviceInfo.preferredVectorWidthFloat = 1; - deviceInfo.preferredVectorWidthHalf = 8; - deviceInfo.nativeVectorWidthChar = 16; - deviceInfo.nativeVectorWidthShort = 8; - deviceInfo.nativeVectorWidthInt = 4; - deviceInfo.nativeVectorWidthLong = 1; - deviceInfo.nativeVectorWidthFloat = 1; - deviceInfo.nativeVectorWidthHalf = 8; + deviceInfo.preferredVectorWidthChar = gfxCoreHelper.getPreferredVectorWidthChar(simdSizeUsed); + deviceInfo.preferredVectorWidthShort = gfxCoreHelper.getPreferredVectorWidthShort(simdSizeUsed); + deviceInfo.preferredVectorWidthInt = gfxCoreHelper.getPreferredVectorWidthInt(simdSizeUsed); + deviceInfo.preferredVectorWidthLong = gfxCoreHelper.getPreferredVectorWidthLong(simdSizeUsed); + deviceInfo.preferredVectorWidthFloat = gfxCoreHelper.getPreferredVectorWidthFloat(simdSizeUsed); + deviceInfo.preferredVectorWidthHalf = gfxCoreHelper.getPreferredVectorWidthHalf(simdSizeUsed); + deviceInfo.nativeVectorWidthChar = gfxCoreHelper.getNativeVectorWidthChar(simdSizeUsed); + deviceInfo.nativeVectorWidthShort = gfxCoreHelper.getNativeVectorWidthShort(simdSizeUsed); + deviceInfo.nativeVectorWidthInt = gfxCoreHelper.getNativeVectorWidthInt(simdSizeUsed); + deviceInfo.nativeVectorWidthLong = gfxCoreHelper.getNativeVectorWidthLong(simdSizeUsed); + deviceInfo.nativeVectorWidthFloat = gfxCoreHelper.getNativeVectorWidthFloat(simdSizeUsed); + deviceInfo.nativeVectorWidthHalf = gfxCoreHelper.getNativeVectorWidthHalf(simdSizeUsed); deviceInfo.maxReadWriteImageArgs = hwInfo.capabilityTable.supportsImages ? 128 : 0; deviceInfo.executionCapabilities = CL_EXEC_KERNEL; diff --git a/shared/source/helpers/gfx_core_helper.h b/shared/source/helpers/gfx_core_helper.h index 7635590ec9..acd61b83c2 100644 --- a/shared/source/helpers/gfx_core_helper.h +++ b/shared/source/helpers/gfx_core_helper.h @@ -105,6 +105,18 @@ class GfxCoreHelper { virtual bool isWaDisableRccRhwoOptimizationRequired() const = 0; virtual uint32_t getMinimalSIMDSize() const = 0; + virtual uint32_t getPreferredVectorWidthChar(uint32_t vectorWidthSize) const = 0; + virtual uint32_t getPreferredVectorWidthShort(uint32_t vectorWidthSize) const = 0; + virtual uint32_t getPreferredVectorWidthInt(uint32_t vectorWidthSize) const = 0; + virtual uint32_t getPreferredVectorWidthLong(uint32_t vectorWidthSize) const = 0; + virtual uint32_t getPreferredVectorWidthFloat(uint32_t vectorWidthSize) const = 0; + virtual uint32_t getPreferredVectorWidthHalf(uint32_t vectorWidthSize) const = 0; + virtual uint32_t getNativeVectorWidthChar(uint32_t vectorWidthSize) const = 0; + virtual uint32_t getNativeVectorWidthShort(uint32_t vectorWidthSize) const = 0; + virtual uint32_t getNativeVectorWidthInt(uint32_t vectorWidthSize) const = 0; + virtual uint32_t getNativeVectorWidthLong(uint32_t vectorWidthSize) const = 0; + virtual uint32_t getNativeVectorWidthFloat(uint32_t vectorWidthSize) const = 0; + virtual uint32_t getNativeVectorWidthHalf(uint32_t vectorWidthSize) const = 0; virtual uint32_t getMinimalGrfSize() const = 0; virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo, const ProductHelper &productHelper) const = 0; virtual bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const = 0; @@ -337,6 +349,19 @@ class GfxCoreHelperHw : public GfxCoreHelper { uint32_t getMinimalSIMDSize() const override; + uint32_t getPreferredVectorWidthChar(uint32_t vectorWidthSize) const override; + uint32_t getPreferredVectorWidthShort(uint32_t vectorWidthSize) const override; + uint32_t getPreferredVectorWidthInt(uint32_t vectorWidthSize) const override; + uint32_t getPreferredVectorWidthLong(uint32_t vectorWidthSize) const override; + uint32_t getPreferredVectorWidthFloat(uint32_t vectorWidthSize) const override; + uint32_t getPreferredVectorWidthHalf(uint32_t vectorWidthSize) const override; + uint32_t getNativeVectorWidthChar(uint32_t vectorWidthSize) const override; + uint32_t getNativeVectorWidthShort(uint32_t vectorWidthSize) const override; + uint32_t getNativeVectorWidthInt(uint32_t vectorWidthSize) const override; + uint32_t getNativeVectorWidthLong(uint32_t vectorWidthSize) const override; + uint32_t getNativeVectorWidthFloat(uint32_t vectorWidthSize) const override; + uint32_t getNativeVectorWidthHalf(uint32_t vectorWidthSize) const override; + uint32_t getMinimalGrfSize() const override; uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double resolution) const override; diff --git a/shared/source/helpers/gfx_core_helper_base.inl b/shared/source/helpers/gfx_core_helper_base.inl index 03f0389fd3..7853548b99 100644 --- a/shared/source/helpers/gfx_core_helper_base.inl +++ b/shared/source/helpers/gfx_core_helper_base.inl @@ -463,6 +463,66 @@ inline uint32_t GfxCoreHelperHw::getMinimalSIMDSize() const { return 8u; } +template +uint32_t GfxCoreHelperHw::getPreferredVectorWidthChar(uint32_t vectorWidthSize) const { + return 16; +} + +template +uint32_t GfxCoreHelperHw::getPreferredVectorWidthShort(uint32_t vectorWidthSize) const { + return 8; +} + +template +uint32_t GfxCoreHelperHw::getPreferredVectorWidthInt(uint32_t vectorWidthSize) const { + return 4; +} + +template +uint32_t GfxCoreHelperHw::getPreferredVectorWidthLong(uint32_t vectorWidthSize) const { + return 1; +} + +template +uint32_t GfxCoreHelperHw::getPreferredVectorWidthFloat(uint32_t vectorWidthSize) const { + return 1; +} + +template +uint32_t GfxCoreHelperHw::getPreferredVectorWidthHalf(uint32_t vectorWidthSize) const { + return 8; +} + +template +uint32_t GfxCoreHelperHw::getNativeVectorWidthChar(uint32_t vectorWidthSize) const { + return 16; +} + +template +uint32_t GfxCoreHelperHw::getNativeVectorWidthShort(uint32_t vectorWidthSize) const { + return 8; +} + +template +uint32_t GfxCoreHelperHw::getNativeVectorWidthInt(uint32_t vectorWidthSize) const { + return 4; +} + +template +uint32_t GfxCoreHelperHw::getNativeVectorWidthLong(uint32_t vectorWidthSize) const { + return 1; +} + +template +uint32_t GfxCoreHelperHw::getNativeVectorWidthFloat(uint32_t vectorWidthSize) const { + return 1; +} + +template +uint32_t GfxCoreHelperHw::getNativeVectorWidthHalf(uint32_t vectorWidthSize) const { + return 8; +} + template std::unique_ptr GfxCoreHelperHw::createTimestampPacketAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, MemoryManager *memoryManager, size_t initialTagCount, CommandStreamReceiverType csrType, DeviceBitfield deviceBitfield) const { diff --git a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp index f14969712f..d85d236f94 100644 --- a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp +++ b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp @@ -1061,6 +1061,26 @@ HWTEST_F(GfxCoreHelperTest, givenDefaultGfxCoreHelperHwWhenMinimalSIMDSizeIsQuer EXPECT_EQ(8u, gfxCoreHelper.getMinimalSIMDSize()); } +HWTEST_F(GfxCoreHelperTest, givenDefaultGfxCoreHelperHwWhenGettingPreferredVectorWidthsThenCorrectValuesAreReturned) { + const auto &gfxCoreHelper = getHelper(); + EXPECT_EQ(16u, gfxCoreHelper.getPreferredVectorWidthChar(gfxCoreHelper.getMinimalSIMDSize())); + EXPECT_EQ(8u, gfxCoreHelper.getPreferredVectorWidthShort(gfxCoreHelper.getMinimalSIMDSize())); + EXPECT_EQ(4u, gfxCoreHelper.getPreferredVectorWidthInt(gfxCoreHelper.getMinimalSIMDSize())); + EXPECT_EQ(1u, gfxCoreHelper.getPreferredVectorWidthLong(gfxCoreHelper.getMinimalSIMDSize())); + EXPECT_EQ(1u, gfxCoreHelper.getPreferredVectorWidthFloat(gfxCoreHelper.getMinimalSIMDSize())); + EXPECT_EQ(8u, gfxCoreHelper.getPreferredVectorWidthHalf(gfxCoreHelper.getMinimalSIMDSize())); +} + +HWTEST_F(GfxCoreHelperTest, givenDefaultGfxCoreHelperHwWhenGettingNativeVectorWidthsThenCorrectValuesAreReturned) { + const auto &gfxCoreHelper = getHelper(); + EXPECT_EQ(16u, gfxCoreHelper.getNativeVectorWidthChar(gfxCoreHelper.getMinimalSIMDSize())); + EXPECT_EQ(8u, gfxCoreHelper.getNativeVectorWidthShort(gfxCoreHelper.getMinimalSIMDSize())); + EXPECT_EQ(4u, gfxCoreHelper.getNativeVectorWidthInt(gfxCoreHelper.getMinimalSIMDSize())); + EXPECT_EQ(1u, gfxCoreHelper.getNativeVectorWidthLong(gfxCoreHelper.getMinimalSIMDSize())); + EXPECT_EQ(1u, gfxCoreHelper.getNativeVectorWidthFloat(gfxCoreHelper.getMinimalSIMDSize())); + EXPECT_EQ(8u, gfxCoreHelper.getNativeVectorWidthHalf(gfxCoreHelper.getMinimalSIMDSize())); +} + HWTEST_F(GfxCoreHelperTest, givenDefaultGfxCoreHelperHwWhenMinimalGrfSizeIsQueriedThen128IsReturned) { const auto &gfxCoreHelper = getHelper(); EXPECT_EQ(128u, gfxCoreHelper.getMinimalGrfSize());