diff --git a/shared/source/os_interface/linux/device_time_drm.cpp b/shared/source/os_interface/linux/device_time_drm.cpp index 43fbe59632..cd9dfc90ee 100644 --- a/shared/source/os_interface/linux/device_time_drm.cpp +++ b/shared/source/os_interface/linux/device_time_drm.cpp @@ -19,95 +19,10 @@ namespace NEO { DeviceTimeDrm::DeviceTimeDrm(OSInterface &osInterface) { pDrm = osInterface.getDriverModel()->as(); - timestampTypeDetect(); } -void DeviceTimeDrm::timestampTypeDetect() { - RegisterRead reg = {}; - int err; - - reg.offset = (REG_GLOBAL_TIMESTAMP_LDW | 1); - auto ioctlHelper = pDrm->getIoctlHelper(); - err = ioctlHelper->ioctl(DrmIoctl::RegRead, ®); - if (err) { - reg.offset = REG_GLOBAL_TIMESTAMP_UN; - err = ioctlHelper->ioctl(DrmIoctl::RegRead, ®); - if (err) { - getGpuTime = &DeviceTimeDrm::getGpuTime32; - } else { - getGpuTime = &DeviceTimeDrm::getGpuTimeSplitted; - } - } else { - getGpuTime = &DeviceTimeDrm::getGpuTime36; - } -} - -bool DeviceTimeDrm::getGpuTime32(uint64_t *timestamp) { - RegisterRead reg = {}; - - reg.offset = REG_GLOBAL_TIMESTAMP_LDW; - - auto ioctlHelper = pDrm->getIoctlHelper(); - if (ioctlHelper->ioctl(DrmIoctl::RegRead, ®)) { - return false; - } - *timestamp = reg.value >> 32; - return true; -} - -bool DeviceTimeDrm::getGpuTime36(uint64_t *timestamp) { - RegisterRead reg = {}; - - reg.offset = REG_GLOBAL_TIMESTAMP_LDW | 1; - - auto ioctlHelper = pDrm->getIoctlHelper(); - if (ioctlHelper->ioctl(DrmIoctl::RegRead, ®)) { - return false; - } - *timestamp = reg.value; - return true; -} - -bool DeviceTimeDrm::getGpuTimeSplitted(uint64_t *timestamp) { - RegisterRead regHi = {}; - RegisterRead regLo = {}; - uint64_t tmpHi; - int err = 0, loop = 3; - - regHi.offset = REG_GLOBAL_TIMESTAMP_UN; - regLo.offset = REG_GLOBAL_TIMESTAMP_LDW; - - auto ioctlHelper = pDrm->getIoctlHelper(); - err += ioctlHelper->ioctl(DrmIoctl::RegRead, ®Hi); - do { - tmpHi = regHi.value; - err += ioctlHelper->ioctl(DrmIoctl::RegRead, ®Lo); - err += ioctlHelper->ioctl(DrmIoctl::RegRead, ®Hi); - } while (err == 0 && regHi.value != tmpHi && --loop); - - if (err) { - return false; - } - - *timestamp = regLo.value | (regHi.value << 32); - return true; -} - -std::optional initialGpuTimeStamp{}; -bool waitingForGpuTimeStampOverflow = false; -uint64_t gpuTimeStampOverflowCounter = 0; - bool DeviceTimeDrm::getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) { - if (nullptr == this->getGpuTime) { - return false; - } - if (!(this->*getGpuTime)(&pGpuCpuTime->gpuTimeStamp)) { - return false; - } - if (!osTime->getCpuTime(&pGpuCpuTime->cpuTimeinNS)) { - return false; - } - return true; + return pDrm->getIoctlHelper()->setGpuCpuTimes(pGpuCpuTime, osTime); } double DeviceTimeDrm::getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const { @@ -116,13 +31,14 @@ double DeviceTimeDrm::getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo auto error = pDrm->getTimestampFrequency(frequency); if (!error) { - return 1000000000.0 / frequency; + return nanosecondsPerSecond / frequency; } } return OSTime::getDeviceTimerResolution(hwInfo); } uint64_t DeviceTimeDrm::getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const { + if (pDrm) { int frequency = 0; @@ -131,7 +47,7 @@ uint64_t DeviceTimeDrm::getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) c return static_cast(frequency); } } - return static_cast(1000000000.0 / OSTime::getDeviceTimerResolution(hwInfo)); + return static_cast(nanosecondsPerSecond / OSTime::getDeviceTimerResolution(hwInfo)); } -} // namespace NEO +} // namespace NEO \ No newline at end of file diff --git a/shared/source/os_interface/linux/device_time_drm.h b/shared/source/os_interface/linux/device_time_drm.h index a828a5936f..d29e1b8bd5 100644 --- a/shared/source/os_interface/linux/device_time_drm.h +++ b/shared/source/os_interface/linux/device_time_drm.h @@ -15,17 +15,13 @@ class DeviceTimeDrm : public DeviceTime { public: DeviceTimeDrm(OSInterface &osInterface); bool getGpuCpuTimeImpl(TimeStampData *pGpuCpuTime, OSTime *osTime) override; - typedef bool (DeviceTimeDrm::*TimestampFunction)(uint64_t *); - void timestampTypeDetect(); - TimestampFunction getGpuTime = nullptr; - bool getGpuTime32(uint64_t *timestamp); - bool getGpuTime36(uint64_t *timestamp); - bool getGpuTimeSplitted(uint64_t *timestamp); double getDynamicDeviceTimerResolution(HardwareInfo const &hwInfo) const override; uint64_t getDynamicDeviceTimerClock(HardwareInfo const &hwInfo) const override; protected: Drm *pDrm = nullptr; + + static constexpr double nanosecondsPerSecond = 1000000000.0; }; } // namespace NEO diff --git a/shared/source/os_interface/linux/ioctl_helper.cpp b/shared/source/os_interface/linux/ioctl_helper.cpp index 0c99441cf4..4f2ba08b91 100644 --- a/shared/source/os_interface/linux/ioctl_helper.cpp +++ b/shared/source/os_interface/linux/ioctl_helper.cpp @@ -14,12 +14,14 @@ #include "shared/source/helpers/constants.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/ptr_math.h" +#include "shared/source/helpers/register_offsets.h" #include "shared/source/os_interface/linux/drm_neo.h" #include "shared/source/os_interface/linux/drm_wrappers.h" #include "shared/source/os_interface/linux/engine_info.h" #include "shared/source/os_interface/linux/i915.h" #include "shared/source/os_interface/linux/memory_info.h" #include "shared/source/os_interface/linux/os_context_linux.h" +#include "shared/source/os_interface/os_time.h" #include #include @@ -574,4 +576,89 @@ bool IoctlHelper::setGemTiling(void *setTiling) { bool IoctlHelper::getGemTiling(void *setTiling) { return this->ioctl(DrmIoctl::GemGetTiling, setTiling) == 0; } + +bool getGpuTime32(::NEO::Drm &drm, uint64_t *timestamp) { + RegisterRead reg = {}; + reg.offset = REG_GLOBAL_TIMESTAMP_LDW; + + if (drm.ioctl(DrmIoctl::RegRead, ®)) { + return false; + } + *timestamp = reg.value >> 32; + return true; +} + +bool getGpuTime36(::NEO::Drm &drm, uint64_t *timestamp) { + RegisterRead reg = {}; + reg.offset = REG_GLOBAL_TIMESTAMP_LDW | 1; + + if (drm.ioctl(DrmIoctl::RegRead, ®)) { + return false; + } + *timestamp = reg.value; + return true; +} + +bool getGpuTimeSplitted(::NEO::Drm &drm, uint64_t *timestamp) { + RegisterRead regHi = {}; + RegisterRead regLo = {}; + uint64_t tmpHi; + int err = 0, loop = 3; + + regHi.offset = REG_GLOBAL_TIMESTAMP_UN; + regLo.offset = REG_GLOBAL_TIMESTAMP_LDW; + + err += drm.ioctl(DrmIoctl::RegRead, ®Hi); + do { + tmpHi = regHi.value; + err += drm.ioctl(DrmIoctl::RegRead, ®Lo); + err += drm.ioctl(DrmIoctl::RegRead, ®Hi); + } while (err == 0 && regHi.value != tmpHi && --loop); + + if (err) { + return false; + } + + *timestamp = regLo.value | (regHi.value << 32); + return true; +} + +void IoctlHelper::initializeGetGpuTimeFunction() { + RegisterRead reg = {}; + int err; + + reg.offset = (REG_GLOBAL_TIMESTAMP_LDW | 1); + err = this->ioctl(DrmIoctl::RegRead, ®); + if (err) { + reg.offset = REG_GLOBAL_TIMESTAMP_UN; + err = this->ioctl(DrmIoctl::RegRead, ®); + if (err) { + this->getGpuTime = getGpuTime32; + } else { + this->getGpuTime = getGpuTimeSplitted; + } + } else { + this->getGpuTime = getGpuTime36; + } +} + +bool IoctlHelper::setGpuCpuTimes(TimeStampData *pGpuCpuTime, OSTime *osTime) { + if (pGpuCpuTime == nullptr || osTime == nullptr) { + return false; + } + + if (!this->getGpuTime) { + return false; + } + + if (!this->getGpuTime(drm, &pGpuCpuTime->gpuTimeStamp)) { + return false; + } + if (!osTime->getCpuTime(&pGpuCpuTime->cpuTimeinNS)) { + return false; + } + + return true; +} + } // namespace NEO diff --git a/shared/source/os_interface/linux/ioctl_helper.h b/shared/source/os_interface/linux/ioctl_helper.h index bc102464e6..cd38b05286 100644 --- a/shared/source/os_interface/linux/ioctl_helper.h +++ b/shared/source/os_interface/linux/ioctl_helper.h @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -24,6 +25,7 @@ namespace NEO { class Drm; class OsContextLinux; class IoctlHelper; +class OSTime; enum class CacheRegion : uint16_t; enum class PreferredLocation : int16_t; enum class AtomicAccessMode : uint32_t; @@ -31,6 +33,7 @@ struct HardwareInfo; struct HardwareIpVersion; struct EngineInfo; struct DrmQueryTopologyData; +struct TimeStampData; class MemoryInfo; @@ -163,6 +166,10 @@ class IoctlHelper { bool translateTopologyInfo(const QueryTopologyInfo *queryTopologyInfo, DrmQueryTopologyData &topologyData, TopologyMapping &mapping); virtual void fillBindInfoForIpcHandle(uint32_t handle, size_t size); + virtual void initializeGetGpuTimeFunction(); + virtual bool setGpuCpuTimes(TimeStampData *pGpuCpuTime, OSTime *osTime); + std::function getGpuTime; + protected: Drm &drm; }; diff --git a/shared/source/os_interface/linux/ioctl_helper_prelim.cpp b/shared/source/os_interface/linux/ioctl_helper_prelim.cpp index 7e23fd74f4..5ce37754a3 100644 --- a/shared/source/os_interface/linux/ioctl_helper_prelim.cpp +++ b/shared/source/os_interface/linux/ioctl_helper_prelim.cpp @@ -920,6 +920,7 @@ bool IoctlHelperPrelim20::queryHwIpVersion(EngineClassInstance &engineInfo, Hard } bool IoctlHelperPrelim20::initialize() { + initializeGetGpuTimeFunction(); return true; } diff --git a/shared/source/os_interface/linux/ioctl_helper_upstream.cpp b/shared/source/os_interface/linux/ioctl_helper_upstream.cpp index 25994809ee..d9cef02719 100644 --- a/shared/source/os_interface/linux/ioctl_helper_upstream.cpp +++ b/shared/source/os_interface/linux/ioctl_helper_upstream.cpp @@ -17,6 +17,7 @@ namespace NEO { bool IoctlHelperUpstream::initialize() { detectExtSetPatSupport(); + initializeGetGpuTimeFunction(); return true; } diff --git a/shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp b/shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp index 93a9593637..38ce27de15 100644 --- a/shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp +++ b/shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp @@ -15,6 +15,7 @@ #include "shared/source/helpers/bit_helpers.h" #include "shared/source/helpers/common_types.h" #include "shared/source/helpers/constants.h" +#include "shared/source/helpers/engine_control.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/register_offsets.h" @@ -23,6 +24,7 @@ #include "shared/source/os_interface/linux/engine_info.h" #include "shared/source/os_interface/linux/memory_info.h" #include "shared/source/os_interface/linux/os_context_linux.h" +#include "shared/source/os_interface/os_time.h" #include "drm/i915_drm_prelim.h" #include "drm/xe_drm.h" @@ -83,22 +85,34 @@ const char *IoctlHelperXe::xeGetClassName(int className) { return "???"; } -const char *IoctlHelperXe::xeGetBindOpName(int bindOp) { - switch (bindOp) { +const char *IoctlHelperXe::xeGetBindOperationName(int bindOperation) { + switch (bindOperation) { case XE_VM_BIND_OP_MAP: return "MAP"; case XE_VM_BIND_OP_UNMAP: return "UNMAP"; case XE_VM_BIND_OP_MAP_USERPTR: return "MAP_USERPTR"; - case XE_VM_BIND_OP_MAP | XE_VM_BIND_FLAG_ASYNC: - return "AS_MAP"; - case XE_VM_BIND_OP_UNMAP | XE_VM_BIND_FLAG_ASYNC: - return "AS_UNMAP"; - case XE_VM_BIND_OP_MAP_USERPTR | XE_VM_BIND_FLAG_ASYNC: - return "AS_MAP_USERPTR"; + case XE_VM_BIND_OP_UNMAP_ALL: + return "UNMAP ALL"; + case XE_VM_BIND_OP_PREFETCH: + return "PREFETCH"; } - return "unknown_OP"; + return "Unknown operation"; +} + +const char *IoctlHelperXe::xeGetBindFlagsName(int bindFlags) { + switch (bindFlags) { + case XE_VM_BIND_FLAG_READONLY: + return "READ_ONLY"; + case XE_VM_BIND_FLAG_ASYNC: + return "ASYNC"; + case XE_VM_BIND_FLAG_IMMEDIATE: + return "IMMEDIATE"; + case XE_VM_BIND_FLAG_NULL: + return "NULL"; + } + return "Unknown flag"; } const char *IoctlHelperXe::xeGetengineClassName(uint32_t engineClass) { @@ -125,7 +139,7 @@ IoctlHelperXe::IoctlHelperXe(Drm &drmArg) : IoctlHelper(drmArg) { bool IoctlHelperXe::initialize() { xeLog("IoctlHelperXe::initialize\n", ""); - struct drm_xe_device_query queryConfig = {}; + drm_xe_device_query queryConfig = {}; queryConfig.query = DRM_XE_DEVICE_QUERY_CONFIG; auto retVal = IoctlHelper::ioctl(DrmIoctl::Query, &queryConfig); @@ -139,7 +153,7 @@ bool IoctlHelperXe::initialize() { xeLog("XE_QUERY_CONFIG_REV_AND_DEVICE_ID\t%#llx\n", config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID]); xeLog(" REV_ID\t\t\t\t%#llx\n", - config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] >> 16); + (config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] >> 16) & 0xff); xeLog(" DEVICE_ID\t\t\t\t%#llx\n", config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] & 0xffff); xeLog("XE_QUERY_CONFIG_FLAGS\t\t\t%#llx\n", @@ -159,7 +173,7 @@ bool IoctlHelperXe::initialize() { config->info[XE_QUERY_CONFIG_MEM_REGION_COUNT]); chipsetId = config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] & 0xffff; - revId = static_cast(config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] >> 16); + revId = static_cast((config->info[XE_QUERY_CONFIG_REV_AND_DEVICE_ID] >> 16) & 0xff); hasVram = config->info[XE_QUERY_CONFIG_FLAGS] & XE_QUERY_CONFIG_FLAGS_HAS_VRAM ? 1 : 0; memset(&queryConfig, 0, sizeof(queryConfig)); @@ -173,6 +187,7 @@ bool IoctlHelperXe::initialize() { auto hwInfo = this->drm.getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->platform.usDeviceID = chipsetId; hwInfo->platform.usRevId = revId; + return true; } @@ -253,6 +268,9 @@ std::unique_ptr IoctlHelperXe::createEngineInfo(bool isSysmanEnabled multiTileArchInfo.TileCount = multiTileMask.count(); multiTileArchInfo.TileMask = static_cast(multiTileMask.to_ulong()); } + + setDefaultEngine(); + return std::make_unique(&drm, enginesPerTile); } @@ -267,15 +285,15 @@ inline MemoryRegion createMemoryRegionFromXeMemRegion(const drm_xe_query_mem_reg std::unique_ptr IoctlHelperXe::createMemoryInfo() { auto memUsageData = queryData(DRM_XE_DEVICE_QUERY_MEM_USAGE); - auto gtsData = queryData(DRM_XE_DEVICE_QUERY_GTS); + auto gtListData = queryData(DRM_XE_DEVICE_QUERY_GT_LIST); - if (memUsageData.empty() || gtsData.empty()) { + if (memUsageData.empty() || gtListData.empty()) { return {}; } MemoryInfo::RegionContainer regionsContainer{}; auto xeMemUsageData = reinterpret_cast(memUsageData.data()); - auto xeGtsData = reinterpret_cast(gtsData.data()); + auto xeGtListData = reinterpret_cast(gtListData.data()); std::array memoryRegionInstances{}; @@ -291,19 +309,84 @@ std::unique_ptr IoctlHelperXe::createMemoryInfo() { return {}; } - for (auto i = 0u; i < xeGtsData->num_gt; i++) { - if (xeGtsData->gts[i].type != XE_QUERY_GT_TYPE_MEDIA) { - uint64_t nativeMemRegions = xeGtsData->gts[i].native_mem_regions; + for (auto i = 0u; i < xeGtListData->num_gt; i++) { + if (xeGtListData->gt_list[i].type != XE_QUERY_GT_TYPE_MEDIA) { + uint64_t nativeMemRegions = xeGtListData->gt_list[i].native_mem_regions; auto regionIndex = Math::log2(nativeMemRegions); UNRECOVERABLE_IF(!memoryRegionInstances[regionIndex]); regionsContainer.push_back(createMemoryRegionFromXeMemRegion(*memoryRegionInstances[regionIndex])); - - xeTimestampFrequency = xeGtsData->gts[i].clock_freq; + xeTimestampFrequency = xeGtListData->gt_list[i].clock_freq; } } return std::make_unique(regionsContainer, drm); } +bool IoctlHelperXe::setGpuCpuTimes(TimeStampData *pGpuCpuTime, OSTime *osTime) { + if (pGpuCpuTime == nullptr || osTime == nullptr) { + return false; + } + + drm_xe_device_query deviceQuery = {}; + deviceQuery.query = DRM_XE_DEVICE_QUERY_ENGINE_CYCLES; + + auto ret = IoctlHelper::ioctl(DrmIoctl::Query, &deviceQuery); + + if (ret != 0) { + xeLog(" -> IoctlHelperXe::%s s=0x%lx r=%d\n", __FUNCTION__, deviceQuery.size, ret); + return false; + } + + std::vector retVal(deviceQuery.size); + deviceQuery.data = castToUint64(retVal.data()); + + drm_xe_query_engine_cycles *queryEngineCycles = reinterpret_cast(retVal.data()); + queryEngineCycles->clockid = CLOCK_MONOTONIC_RAW; + queryEngineCycles->eci = *this->defaultEngine; + + ret = IoctlHelper::ioctl(DrmIoctl::Query, &deviceQuery); + + auto nValidBits = queryEngineCycles->width; + auto gpuTimestampValidBits = maxNBitValue(nValidBits); + auto gpuCycles = queryEngineCycles->engine_cycles & gpuTimestampValidBits; + + xeLog(" -> IoctlHelperXe::%s [%d,%d] clockId=0x%x s=0x%lx nValidBits=0x%x gpuCycles=0x%x cpuTimeInNS=0x%x r=%d\n", __FUNCTION__, + queryEngineCycles->eci.engine_class, queryEngineCycles->eci.engine_instance, + queryEngineCycles->clockid, deviceQuery.size, nValidBits, gpuCycles, queryEngineCycles->cpu_timestamp, ret); + + pGpuCpuTime->gpuTimeStamp = gpuCycles; + pGpuCpuTime->cpuTimeinNS = queryEngineCycles->cpu_timestamp; + + return ret == 0; +} + +bool IoctlHelperXe::getTimestampFrequency(uint64_t &frequency) { + drm_xe_device_query deviceQuery = {}; + deviceQuery.query = DRM_XE_DEVICE_QUERY_ENGINE_CYCLES; + + auto ret = IoctlHelper::ioctl(DrmIoctl::Query, &deviceQuery); + + if (ret != 0) { + xeLog(" -> IoctlHelperXe::%s s=0x%lx r=%d\n", __FUNCTION__, deviceQuery.size, ret); + return false; + } + + std::vector retVal(deviceQuery.size); + deviceQuery.data = castToUint64(retVal.data()); + + drm_xe_query_engine_cycles *queryEngineCycles = reinterpret_cast(retVal.data()); + queryEngineCycles->clockid = CLOCK_MONOTONIC_RAW; + queryEngineCycles->eci = *defaultEngine; + + ret = IoctlHelper::ioctl(DrmIoctl::Query, &deviceQuery); + frequency = queryEngineCycles->engine_frequency; + + xeLog(" -> IoctlHelperXe::%s [%d,%d] clockId=0x%x s=0x%lx frequency=0x%x r=%d\n", __FUNCTION__, + queryEngineCycles->eci.engine_class, queryEngineCycles->eci.engine_instance, + queryEngineCycles->clockid, deviceQuery.size, frequency, ret); + + return ret == 0; +} + void IoctlHelperXe::getTopologyData(size_t nTiles, std::vector> *geomDss, std::vector> *computeDss, std::vector> *euDss, DrmQueryTopologyData &topologyData, bool &isComputeDssEmpty) { int subSliceCount = 0; @@ -383,13 +466,13 @@ bool IoctlHelperXe::getTopologyDataAndMap(const HardwareInfo &hwInfo, DrmQueryTo auto topologySize = queryGtTopology.size(); auto dataPtr = queryGtTopology.data(); - auto gtsData = queryData(DRM_XE_DEVICE_QUERY_GTS); - auto xeGtsData = reinterpret_cast(gtsData.data()); - gtIdToTile.resize(xeGtsData->num_gt, -1); + auto gtsData = queryData(DRM_XE_DEVICE_QUERY_GT_LIST); + auto xeGtListData = reinterpret_cast(gtsData.data()); + gtIdToTile.resize(xeGtListData->num_gt, -1); auto tileIndex = 0u; for (auto gt = 0u; gt < gtIdToTile.size(); gt++) { - if (xeGtsData->gts[gt].type != XE_QUERY_GT_TYPE_MEDIA) { + if (xeGtListData->gt_list[gt].type != XE_QUERY_GT_TYPE_MEDIA) { gtIdToTile[gt] = tileIndex++; } } @@ -403,7 +486,7 @@ bool IoctlHelperXe::getTopologyDataAndMap(const HardwareInfo &hwInfo, DrmQueryTo uint32_t gtId = topo->gt_id; - if (xeGtsData->gts[gtId].type != XE_QUERY_GT_TYPE_MEDIA) { + if (xeGtListData->gt_list[gtId].type != XE_QUERY_GT_TYPE_MEDIA) { switch (topo->type) { case XE_TOPO_DSS_GEOMETRY: fillMask(geomDss[gtIdToTile[gtId]], topo); @@ -440,6 +523,21 @@ void IoctlHelperXe::updateBindInfo(uint32_t handle, uint64_t userPtr, uint64_t s bindInfo.push_back(b); } +void IoctlHelperXe::setDefaultEngine() { + auto defaultEngineClass = DRM_XE_ENGINE_CLASS_COMPUTE; + + for (auto i = 0u; i < allEngines.size(); i++) { + if (allEngines[i].engine_class == defaultEngineClass) { + defaultEngine = xeFindMatchingEngine(defaultEngineClass, allEngines[i].engine_instance); + break; + } + } + + if (defaultEngine == nullptr) { + UNRECOVERABLE_IF(true); + } +} + int IoctlHelperXe::createGemExt(const MemRegionsVec &memClassInstances, size_t allocSize, uint32_t &handle, uint64_t patIndex, std::optional vmId, int32_t pairHandle, bool isChunked, uint32_t numOfChunks) { struct drm_xe_gem_create create = {}; uint32_t regionsSize = static_cast(memClassInstances.size()); @@ -814,8 +912,6 @@ unsigned int IoctlHelperXe::getIoctlRequestValue(DrmIoctl ioctlRequest) const { RETURN_ME(DRM_IOCTL_PRIME_FD_TO_HANDLE); case DrmIoctl::PrimeHandleToFd: RETURN_ME(DRM_IOCTL_PRIME_HANDLE_TO_FD); - case DrmIoctl::RegRead: - RETURN_ME(DRM_IOCTL_XE_MMIO); default: UNRECOVERABLE_IF(true); return 0; @@ -885,8 +981,6 @@ std::string IoctlHelperXe::getIoctlString(DrmIoctl ioctlRequest) const { STRINGIFY_ME(DRM_IOCTL_PRIME_FD_TO_HANDLE); case DrmIoctl::PrimeHandleToFd: STRINGIFY_ME(DRM_IOCTL_PRIME_HANDLE_TO_FD); - case DrmIoctl::RegRead: - STRINGIFY_ME(DRM_IOCTL_XE_MMIO); default: return "???"; } @@ -915,9 +1009,12 @@ int IoctlHelperXe::ioctl(DrmIoctl request, void *arg) { case static_cast(DrmParam::ParamHasScheduler): *d->value = static_cast(0x80000037); break; - case static_cast(DrmParam::ParamCsTimestampFrequency): - *d->value = static_cast(xeTimestampFrequency); - break; + case static_cast(DrmParam::ParamCsTimestampFrequency): { + uint64_t frequency = 0; + if (getTimestampFrequency(frequency)) { + *d->value = static_cast(frequency); + } + } break; default: ret = -1; } @@ -1045,23 +1142,10 @@ int IoctlHelperXe::ioctl(DrmIoctl request, void *arg) { } xeLog(" -> IoctlHelperXe::ioctl GemClose found=%d h=0x%x r=%d\n", found, d->handle, ret); } break; - case DrmIoctl::RegRead: { - struct drm_xe_mmio mmio = {}; - RegisterRead *reg = static_cast(arg); - mmio.addr = static_cast(reg->offset); - if (reg->offset == (REG_GLOBAL_TIMESTAMP_LDW | 1)) { - mmio.addr = REG_GLOBAL_TIMESTAMP_LDW; - } - mmio.flags = DRM_XE_MMIO_READ | DRM_XE_MMIO_64BIT; - ret = IoctlHelper::ioctl(request, &mmio); - reg->value = mmio.value; - xeLog(" -> IoctlHelperXe::ioctl RegRead 0x%lx/0x%lx r=%d\n", - reg->offset, reg->value, ret); - } break; case DrmIoctl::GemVmCreate: { GemVmControl *d = static_cast(arg); struct drm_xe_vm_create args = {}; - args.flags = DRM_XE_VM_CREATE_ASYNC_BIND_OPS | + args.flags = DRM_XE_VM_CREATE_ASYNC_DEFAULT | DRM_XE_VM_CREATE_COMPUTE_MODE; if (drm.hasPageFaultSupport()) { args.flags |= DRM_XE_VM_CREATE_FAULT_MODE; @@ -1139,10 +1223,10 @@ void IoctlHelperXe::xeShowBindTable() { } int IoctlHelperXe::createDrmContext(Drm &drm, OsContextLinux &osContext, uint32_t drmVmId, uint32_t deviceIndex) { - struct drm_xe_exec_queue_create create = {}; + drm_xe_exec_queue_create create = {}; uint32_t drmContextId = 0; - struct drm_xe_engine_class_instance *currentEngine = nullptr; - std::vector engine; + drm_xe_engine_class_instance *currentEngine = nullptr; + std::vector engine; int requestClass = 0; xeLog("createDrmContext VM=0x%x\n", drmVmId); @@ -1193,12 +1277,7 @@ int IoctlHelperXe::createDrmContext(Drm &drm, OsContextLinux &osContext, uint32_ } create.instances = castToUint64(engine.data()); create.num_placements = engine.size(); - struct drm_xe_ext_exec_queue_set_property ext = {}; - ext.base.name = XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY; - ext.property = XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE; - ext.value = 1; - create.extensions = castToUint64(&ext); int ret = IoctlHelper::ioctl(DrmIoctl::GemContextCreateExt, &create); drmContextId = create.exec_queue_id; xeLog("%s:%d (%d) vmid=0x%x ctx=0x%x r=0x%x\n", xeGetClassName(engine[0].engine_class), @@ -1209,80 +1288,86 @@ int IoctlHelperXe::createDrmContext(Drm &drm, OsContextLinux &osContext, uint32_ return drmContextId; } -int IoctlHelperXe::xeVmBind(const VmBindParams &vmBindParams, bool bindOp) { +int IoctlHelperXe::xeVmBind(const VmBindParams &vmBindParams, bool isBind) { + constexpr int invalidIndex = -1; + auto gmmHelper = drm.getRootDeviceEnvironment().getGmmHelper(); int ret = -1; - const char *operation = "unbind"; - if (bindOp) { - operation = "bind"; - } - int found = -1; - if (bindOp) { - for (unsigned int i = 0; i < bindInfo.size(); i++) { + const char *operation = isBind ? "bind" : "unbind"; + int index = invalidIndex; + + if (isBind) { + for (auto i = 0u; i < bindInfo.size(); i++) { if (vmBindParams.handle == bindInfo[i].handle) { - found = i; + index = i; break; } } - } else { - auto gmmHelper = drm.getRootDeviceEnvironment().getGmmHelper(); - uint64_t ad = gmmHelper->decanonize(vmBindParams.start); - for (unsigned int i = 0; i < bindInfo.size(); i++) { - if (ad == bindInfo[i].addr) { - found = i; + } else // unbind + { + auto address = gmmHelper->decanonize(vmBindParams.start); + for (auto i = 0u; i < bindInfo.size(); i++) { + if (address == bindInfo[i].addr) { + index = i; break; } } } - if (found != -1) { - uint32_t extraBindFlag = 0; - struct drm_xe_sync sync[1] = {}; + + if (index != invalidIndex) { + + drm_xe_sync sync[1] = {}; sync[0].flags = DRM_XE_SYNC_USER_FENCE | DRM_XE_SYNC_SIGNAL; - extraBindFlag = XE_VM_BIND_FLAG_ASYNC; auto xeBindExtUserFence = reinterpret_cast(vmBindParams.extensions); UNRECOVERABLE_IF(!xeBindExtUserFence); UNRECOVERABLE_IF(xeBindExtUserFence->tag != UserFenceExtension::tagValue); sync[0].addr = xeBindExtUserFence->addr; sync[0].timeline_value = xeBindExtUserFence->value; - struct drm_xe_vm_bind bind = {}; + drm_xe_vm_bind bind = {}; bind.vm_id = vmBindParams.vmId; bind.num_binds = 1; - bind.bind.obj = vmBindParams.handle; - bind.bind.obj_offset = vmBindParams.offset; - bind.bind.range = vmBindParams.length; - - auto gmmHelper = drm.getRootDeviceEnvironment().getGmmHelper(); - - bind.bind.addr = gmmHelper->decanonize(vmBindParams.start); - bind.bind.op = XE_VM_BIND_OP_MAP; bind.num_syncs = 1; bind.syncs = reinterpret_cast(&sync); - if (vmBindParams.handle & XE_USERPTR_FAKE_FLAG) { - bind.bind.obj = 0; - bind.bind.obj_offset = bindInfo[found].userptr; - bind.bind.op = XE_VM_BIND_OP_MAP_USERPTR; - } - if (!bindOp) { + bind.bind.range = vmBindParams.length; + bind.bind.addr = gmmHelper->decanonize(vmBindParams.start); + bind.bind.flags = XE_VM_BIND_FLAG_ASYNC; + bind.bind.obj_offset = vmBindParams.offset; + + if (isBind) { + bind.bind.op = XE_VM_BIND_OP_MAP; + bind.bind.obj = vmBindParams.handle; + if (bindInfo[index].handle & XE_USERPTR_FAKE_FLAG) { + bind.bind.op = XE_VM_BIND_OP_MAP_USERPTR; + bind.bind.obj = 0; + bind.bind.obj_offset = bindInfo[index].userptr; + } + } else { bind.bind.op = XE_VM_BIND_OP_UNMAP; bind.bind.obj = 0; - if (bindInfo[found].handle & XE_USERPTR_FAKE_FLAG) { - bind.bind.obj_offset = bindInfo[found].userptr; + if (bindInfo[index].handle & XE_USERPTR_FAKE_FLAG) { + bind.bind.obj_offset = bindInfo[index].userptr; } } - bind.bind.op |= extraBindFlag; - bindInfo[found].addr = bind.bind.addr; - xeLog(" vm=%d obj=0x%x off=0x%llx range=0x%llx addr=0x%llx op=%d(%s) nsy=%d\n", + bindInfo[index].addr = bind.bind.addr; + + ret = IoctlHelper::ioctl(DrmIoctl::GemVmBind, &bind); + + xeLog(" vm=%d obj=0x%x off=0x%llx range=0x%llx addr=0x%llx operation=%d(%s) flags=%d(%s) nsy=%d ret=%d\n", bind.vm_id, bind.bind.obj, bind.bind.obj_offset, bind.bind.range, bind.bind.addr, bind.bind.op, - xeGetBindOpName(bind.bind.op), - bind.num_syncs); - ret = IoctlHelper::ioctl(DrmIoctl::GemVmBind, &bind); + xeGetBindOperationName(bind.bind.op), + bind.bind.flags, + xeGetBindFlagsName(bind.bind.flags), + bind.num_syncs, + ret); + if (ret != 0) { + xeLog("error: %s\n", operation); return ret; } @@ -1291,8 +1376,8 @@ int IoctlHelperXe::xeVmBind(const VmBindParams &vmBindParams, bool bindOp) { sync[0].timeline_value, XE_ONE_SEC); } - xeLog(" -> IoctlHelperXe::%s %s found=%d vmid=0x%x h=0x%x s=0x%llx o=0x%llx l=0x%llx f=0x%llx r=%d\n", - __FUNCTION__, operation, found, vmBindParams.vmId, + xeLog("error: -> IoctlHelperXe::%s %s index=%d vmid=0x%x h=0x%x s=0x%llx o=0x%llx l=0x%llx f=0x%llx r=%d\n", + __FUNCTION__, operation, index, vmBindParams.vmId, vmBindParams.handle, vmBindParams.start, vmBindParams.offset, vmBindParams.length, vmBindParams.flags, ret); @@ -1406,8 +1491,7 @@ std::string IoctlHelperXe::getFileForMaxMemoryFrequencyOfSubDevice(int subDevice return "/device/gt" + std::to_string(subDeviceId) + "/freq_rp0"; } -struct drm_xe_engine_class_instance * -IoctlHelperXe::xeFindMatchingEngine(uint16_t engineClass, uint16_t engineInstance) { +drm_xe_engine_class_instance *IoctlHelperXe::xeFindMatchingEngine(uint16_t engineClass, uint16_t engineInstance) { for (auto &engine : allEngines) { if (engine.engine_class == engineClass && (engineInstance == XE_FIND_INVALID_INSTANCE || engine.engine_instance == engineInstance)) { @@ -1443,4 +1527,4 @@ void IoctlHelperXe::fillBindInfoForIpcHandle(uint32_t handle, size_t size) { bool IoctlHelperXe::isImmediateVmBindRequired() const { return true; } -} // namespace NEO +} // namespace NEO \ No newline at end of file diff --git a/shared/source/os_interface/linux/xe/ioctl_helper_xe.h b/shared/source/os_interface/linux/xe/ioctl_helper_xe.h index 3c50e90fde..81ae4dcb7c 100644 --- a/shared/source/os_interface/linux/xe/ioctl_helper_xe.h +++ b/shared/source/os_interface/linux/xe/ioctl_helper_xe.h @@ -28,6 +28,8 @@ struct drm_xe_engine_class_instance; namespace NEO { +enum class EngineClass : uint16_t; + struct BindInfo { uint32_t handle; uint64_t userptr; @@ -107,6 +109,11 @@ class IoctlHelperXe : public IoctlHelper { std::unique_ptr createMemoryInfo() override; void getTopologyData(size_t nTiles, std::vector> *geomDss, std::vector> *computeDss, std::vector> *euDss, DrmQueryTopologyData &topologyData, bool &isComputeDssEmpty); void getTopologyMap(size_t nTiles, std::vector> *dssInfo, TopologyMap &topologyMap); + + bool setGpuCpuTimes(TimeStampData *pGpuCpuTime, OSTime *osTime) override; + void initializeGetGpuTimeFunction() override{}; + bool getTimestampFrequency(uint64_t &frequency); + void fillBindInfoForIpcHandle(uint32_t handle, size_t size) override; bool isImmediateVmBindRequired() const override; @@ -114,11 +121,13 @@ class IoctlHelperXe : public IoctlHelper { template void xeLog(XeLogArgs &&...args) const; int xeGetQuery(Query *data); - struct drm_xe_engine_class_instance *xeFindMatchingEngine(uint16_t engineClass, uint16_t engineInstance); + drm_xe_engine_class_instance *xeFindMatchingEngine(uint16_t engineClass, uint16_t engineInstance); protected: const char *xeGetClassName(int className); - const char *xeGetBindOpName(int bindOp); + const char *xeGetBindOperationName(int bindOperation); + const char *xeGetBindFlagsName(int bindFlags); + const char *xeGetengineClassName(uint32_t engineClass); template std::vector queryData(uint32_t queryId); @@ -134,6 +143,8 @@ class IoctlHelperXe : public IoctlHelper { uint64_t value; }; + void setDefaultEngine(); + protected: int chipsetId = 0; int revId = 0; @@ -149,6 +160,8 @@ class IoctlHelperXe : public IoctlHelper { std::vector hwconfigFakei915; std::vector contextParamEngine; std::vector allEngines; + + drm_xe_engine_class_instance *defaultEngine = nullptr; }; -} // namespace NEO +} // namespace NEO \ No newline at end of file diff --git a/shared/test/common/mocks/linux/mock_os_time_linux.h b/shared/test/common/mocks/linux/mock_os_time_linux.h index 51c3289985..9a99ed7c57 100644 --- a/shared/test/common/mocks/linux/mock_os_time_linux.h +++ b/shared/test/common/mocks/linux/mock_os_time_linux.h @@ -44,7 +44,6 @@ class MockOSTimeLinux : public OSTimeLinux { void updateDrm(Drm *drm) { osInterface->setDriverModel(std::unique_ptr(drm)); static_cast(this->deviceTime.get())->pDrm = drm; - static_cast(this->deviceTime.get())->timestampTypeDetect(); } static std::unique_ptr create(OSInterface &osInterface) { return std::unique_ptr(new MockOSTimeLinux(osInterface)); diff --git a/shared/test/unit_test/os_interface/linux/ioctl_helper_tests_prelim.cpp b/shared/test/unit_test/os_interface/linux/ioctl_helper_tests_prelim.cpp index e6c35d422a..330c1ff54f 100644 --- a/shared/test/unit_test/os_interface/linux/ioctl_helper_tests_prelim.cpp +++ b/shared/test/unit_test/os_interface/linux/ioctl_helper_tests_prelim.cpp @@ -14,6 +14,7 @@ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/libult/linux/drm_mock.h" +#include "shared/test/common/mocks/linux/mock_os_time_linux.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/os_interface/linux/sys_calls_linux_ult.h" #include "shared/test/common/test_macros/hw_test.h" @@ -26,6 +27,12 @@ using namespace NEO; extern std::vector getRegionInfo(const std::vector &inputRegions); extern std::vector getEngineInfo(const std::vector &inputEngines); +namespace NEO { +bool getGpuTimeSplitted(Drm &drm, uint64_t *timestamp); +bool getGpuTime32(Drm &drm, uint64_t *timestamp); +bool getGpuTime36(Drm &drm, uint64_t *timestamp); +} // namespace NEO + struct IoctlPrelimHelperTests : ::testing::Test { MockExecutionEnvironment executionEnvironment{}; std::unique_ptr drm{Drm::create(std::make_unique(0, ""), *executionEnvironment.rootDeviceEnvironments[0])}; @@ -595,3 +602,90 @@ TEST_F(IoctlPrelimHelperTests, WhenSetupIpVersionIsCalledThenIpVersionIsCorrect) ioctlHelper.setupIpVersion(); EXPECT_EQ(config, hwInfo.ipVersion.value); } + +TEST_F(IoctlPrelimHelperTests, whenGettingGpuTimeThenSucceeds) { + MockExecutionEnvironment executionEnvironment{}; + auto drm = std::make_unique(mockFd, *executionEnvironment.rootDeviceEnvironments[0]); + ASSERT_NE(nullptr, drm); + + IoctlHelperPrelim20 ioctlHelper{*drm}; + ASSERT_EQ(true, ioctlHelper.initialize()); + + uint64_t time = 0; + auto success = getGpuTime32(*drm.get(), &time); + EXPECT_TRUE(success); + EXPECT_NE(0ULL, time); + success = getGpuTime36(*drm.get(), &time); + EXPECT_TRUE(success); + EXPECT_NE(0ULL, time); + success = getGpuTimeSplitted(*drm.get(), &time); + EXPECT_TRUE(success); + EXPECT_NE(0ULL, time); +} + +TEST_F(IoctlPrelimHelperTests, givenInvalidDrmWhenGettingGpuTimeThenFails) { + MockExecutionEnvironment executionEnvironment{}; + auto drm = std::make_unique(*executionEnvironment.rootDeviceEnvironments[0]); + ASSERT_NE(nullptr, drm); + + IoctlHelperPrelim20 ioctlHelper{*drm}; + ASSERT_EQ(true, ioctlHelper.initialize()); + + uint64_t time = 0; + auto success = getGpuTime32(*drm.get(), &time); + EXPECT_FALSE(success); + success = getGpuTime36(*drm.get(), &time); + EXPECT_FALSE(success); + success = getGpuTimeSplitted(*drm.get(), &time); + EXPECT_FALSE(success); +} + +TEST_F(IoctlPrelimHelperTests, whenGettingTimeThenTimeIsCorrect) { + MockExecutionEnvironment executionEnvironment{}; + auto drm = std::make_unique(*executionEnvironment.rootDeviceEnvironments[0]); + ASSERT_NE(nullptr, drm); + + IoctlHelperPrelim20 ioctlHelper{*drm}; + ASSERT_EQ(true, ioctlHelper.initialize()); + + { + auto p = ioctlHelper.getGpuTime; + bool (*const *ptr)(Drm &, uint64_t *) = p.target(); + EXPECT_EQ(*ptr, &::NEO::getGpuTime36); + } + + { + drm->ioctlRes = -1; + ioctlHelper.initializeGetGpuTimeFunction(); + auto p = ioctlHelper.getGpuTime; + bool (*const *ptr)(Drm &, uint64_t *) = p.target(); + EXPECT_EQ(*ptr, &::NEO::getGpuTime32); + } + + DrmMockCustom::IoctlResExt ioctlToPass = {1, 0}; + { + drm->reset(); + drm->ioctlRes = -1; + drm->ioctlResExt = &ioctlToPass; // 2nd ioctl is successful + ioctlHelper.initializeGetGpuTimeFunction(); + auto p = ioctlHelper.getGpuTime; + bool (*const *ptr)(Drm &, uint64_t *) = p.target(); + EXPECT_EQ(*ptr, &::NEO::getGpuTimeSplitted); + drm->ioctlResExt = &drm->none; + } +} + +TEST_F(IoctlPrelimHelperTests, givenInitializeGetGpuTimeFunctionNotCalledWhenSetGpuCpuTimesIsCalledThenFalseIsReturned) { + MockExecutionEnvironment executionEnvironment{}; + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + rootDeviceEnvironment.osInterface = std::make_unique(); + rootDeviceEnvironment.osInterface->setDriverModel(std::make_unique(mockFd, rootDeviceEnvironment)); + auto drm = std::make_unique(rootDeviceEnvironment); + IoctlHelperPrelim20 ioctlHelper{*drm}; + + drm->ioctlRes = -1; + TimeStampData pGpuCpuTime{}; + std::unique_ptr osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface); + auto ret = ioctlHelper.setGpuCpuTimes(&pGpuCpuTime, osTime.get()); + EXPECT_EQ(false, ret); +} diff --git a/shared/test/unit_test/os_interface/linux/ioctl_helper_tests_upstream.cpp b/shared/test/unit_test/os_interface/linux/ioctl_helper_tests_upstream.cpp index d5700722a8..44a80c7b83 100644 --- a/shared/test/unit_test/os_interface/linux/ioctl_helper_tests_upstream.cpp +++ b/shared/test/unit_test/os_interface/linux/ioctl_helper_tests_upstream.cpp @@ -12,19 +12,36 @@ #include "shared/source/os_interface/product_helper.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" +#include "shared/test/common/mocks/linux/mock_os_time_linux.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/test.h" #include "shared/test/unit_test/os_interface/linux/drm_mock_impl.h" using namespace NEO; +namespace NEO { +bool getGpuTimeSplitted(Drm &drm, uint64_t *timestamp); +bool getGpuTime32(Drm &drm, uint64_t *timestamp); +bool getGpuTime36(Drm &drm, uint64_t *timestamp); +} // namespace NEO + struct MockIoctlHelperUpstream : IoctlHelperUpstream { + using IoctlHelperUpstream::initializeGetGpuTimeFunction; using IoctlHelperUpstream::IoctlHelperUpstream; using IoctlHelperUpstream::isSetPatSupported; void detectExtSetPatSupport() override { detectExtSetPatSupportCallCount++; + size_t currentIoctlCallCount = ioctlCallCount; IoctlHelperUpstream::detectExtSetPatSupport(); + detectExtSetPatSupportIoctlCallCount += ioctlCallCount - currentIoctlCallCount; + } + + void initializeGetGpuTimeFunction() override { + initializeGetGpuTimeFunctionCallCount++; + size_t currentIoctlCallCount = ioctlCallCount; + IoctlHelperUpstream::initializeGetGpuTimeFunction(); + initializeGetGpuTimeFunctionIoctlCallCount += ioctlCallCount - currentIoctlCallCount; } int ioctl(DrmIoctl request, void *arg) override { @@ -51,6 +68,9 @@ struct MockIoctlHelperUpstream : IoctlHelperUpstream { } size_t detectExtSetPatSupportCallCount = 0; + size_t detectExtSetPatSupportIoctlCallCount = 0; + size_t initializeGetGpuTimeFunctionCallCount = 0; + size_t initializeGetGpuTimeFunctionIoctlCallCount = 0; size_t ioctlCallCount = 0; std::optional overrideGemCreateExtReturnValue{}; bool lastGemCreateContainedSetPat = false; @@ -63,31 +83,48 @@ TEST(IoctlHelperUpstreamTest, whenInitializeIsCalledThenDetectExtSetPatSupportFu auto executionEnvironment = std::make_unique(); auto drm = std::make_unique(*executionEnvironment->rootDeviceEnvironments[0]); MockIoctlHelperUpstream mockIoctlHelper{*drm}; + EXPECT_EQ(0u, mockIoctlHelper.detectExtSetPatSupportCallCount); EXPECT_FALSE(mockIoctlHelper.lastGemCreateContainedSetPat); - EXPECT_EQ(0u, mockIoctlHelper.ioctlCallCount); + EXPECT_EQ(0u, mockIoctlHelper.detectExtSetPatSupportIoctlCallCount); mockIoctlHelper.overrideGemCreateExtReturnValue = 0; mockIoctlHelper.initialize(); EXPECT_EQ(1u, mockIoctlHelper.detectExtSetPatSupportCallCount); EXPECT_TRUE(mockIoctlHelper.lastGemCreateContainedSetPat); - EXPECT_EQ(2u, mockIoctlHelper.ioctlCallCount); // create and close + EXPECT_EQ(2u, mockIoctlHelper.detectExtSetPatSupportIoctlCallCount); // create and close EXPECT_TRUE(mockIoctlHelper.isSetPatSupported); mockIoctlHelper.overrideGemCreateExtReturnValue = -1; mockIoctlHelper.initialize(); EXPECT_EQ(2u, mockIoctlHelper.detectExtSetPatSupportCallCount); EXPECT_TRUE(mockIoctlHelper.lastGemCreateContainedSetPat); - EXPECT_EQ(3u, mockIoctlHelper.ioctlCallCount); // only create + EXPECT_EQ(3u, mockIoctlHelper.detectExtSetPatSupportIoctlCallCount); // only create EXPECT_FALSE(mockIoctlHelper.isSetPatSupported); DebugManager.flags.DisableGemCreateExtSetPat.set(true); mockIoctlHelper.initialize(); EXPECT_EQ(3u, mockIoctlHelper.detectExtSetPatSupportCallCount); - EXPECT_EQ(3u, mockIoctlHelper.ioctlCallCount); // no ioctl calls + EXPECT_EQ(3u, mockIoctlHelper.detectExtSetPatSupportIoctlCallCount); // no ioctl calls EXPECT_FALSE(mockIoctlHelper.isSetPatSupported); } +TEST(IoctlHelperUpstreamTest, whenInitializeIsCalledThenInitializeGetGpuTimeFunctiontFunctionIsCalled) { + DebugManagerStateRestore stateRestore; + + auto executionEnvironment = std::make_unique(); + auto drm = std::make_unique(*executionEnvironment->rootDeviceEnvironments[0]); + MockIoctlHelperUpstream mockIoctlHelper{*drm}; + + EXPECT_EQ(0u, mockIoctlHelper.initializeGetGpuTimeFunctionCallCount); + EXPECT_EQ(0u, mockIoctlHelper.initializeGetGpuTimeFunctionIoctlCallCount); + + mockIoctlHelper.initialize(); + EXPECT_EQ(1u, mockIoctlHelper.initializeGetGpuTimeFunctionCallCount); + EXPECT_EQ(2u, mockIoctlHelper.initializeGetGpuTimeFunctionIoctlCallCount); + EXPECT_NE(nullptr, mockIoctlHelper.getGpuTime); +} + TEST(IoctlHelperUpstreamTest, whenGettingVmBindAvailabilityThenFalseIsReturned) { auto executionEnvironment = std::make_unique(); auto drm = std::make_unique(*executionEnvironment->rootDeviceEnvironments[0]); @@ -675,3 +712,90 @@ TEST(IoctlHelperTestsUpstream, WhenSetupIpVersionIsCalledThenIpVersionIsCorrect) ioctlHelper.setupIpVersion(); EXPECT_EQ(config, hwInfo.ipVersion.value); } + +TEST(IoctlHelperTestsUpstream, whenGettingGpuTimeThenSucceeds) { + MockExecutionEnvironment executionEnvironment{}; + auto drm = std::make_unique(mockFd, *executionEnvironment.rootDeviceEnvironments[0]); + ASSERT_NE(nullptr, drm); + + IoctlHelperUpstream ioctlHelper{*drm}; + ASSERT_EQ(true, ioctlHelper.initialize()); + + uint64_t time = 0; + auto success = getGpuTime32(*drm.get(), &time); + EXPECT_TRUE(success); + EXPECT_NE(0ULL, time); + success = getGpuTime36(*drm.get(), &time); + EXPECT_TRUE(success); + EXPECT_NE(0ULL, time); + success = getGpuTimeSplitted(*drm.get(), &time); + EXPECT_TRUE(success); + EXPECT_NE(0ULL, time); +} + +TEST(IoctlHelperTestsUpstream, givenInvalidDrmWhenGettingGpuTimeThenFails) { + MockExecutionEnvironment executionEnvironment{}; + auto drm = std::make_unique(*executionEnvironment.rootDeviceEnvironments[0]); + ASSERT_NE(nullptr, drm); + + IoctlHelperUpstream ioctlHelper{*drm}; + ASSERT_EQ(true, ioctlHelper.initialize()); + + uint64_t time = 0; + auto success = getGpuTime32(*drm.get(), &time); + EXPECT_FALSE(success); + success = getGpuTime36(*drm.get(), &time); + EXPECT_FALSE(success); + success = getGpuTimeSplitted(*drm.get(), &time); + EXPECT_FALSE(success); +} + +TEST(IoctlHelperTestsUpstream, whenGettingTimeThenTimeIsCorrect) { + MockExecutionEnvironment executionEnvironment{}; + auto drm = std::make_unique(*executionEnvironment.rootDeviceEnvironments[0]); + ASSERT_NE(nullptr, drm); + + IoctlHelperUpstream ioctlHelper{*drm}; + ASSERT_EQ(true, ioctlHelper.initialize()); + + { + auto p = ioctlHelper.getGpuTime; + bool (*const *ptr)(Drm &, uint64_t *) = p.target(); + EXPECT_EQ(*ptr, &::NEO::getGpuTime36); + } + + { + drm->ioctlRes = -1; + ioctlHelper.initializeGetGpuTimeFunction(); + auto p = ioctlHelper.getGpuTime; + bool (*const *ptr)(Drm &, uint64_t *) = p.target(); + EXPECT_EQ(*ptr, &::NEO::getGpuTime32); + } + + DrmMockCustom::IoctlResExt ioctlToPass = {1, 0}; + { + drm->reset(); + drm->ioctlRes = -1; + drm->ioctlResExt = &ioctlToPass; // 2nd ioctl is successful + ioctlHelper.initializeGetGpuTimeFunction(); + auto p = ioctlHelper.getGpuTime; + bool (*const *ptr)(Drm &, uint64_t *) = p.target(); + EXPECT_EQ(*ptr, &::NEO::getGpuTimeSplitted); + drm->ioctlResExt = &drm->none; + } +} + +TEST(IoctlHelperTestsUpstream, givenInitializeGetGpuTimeFunctionNotCalledWhenSetGpuCpuTimesIsCalledThenFalseIsReturned) { + MockExecutionEnvironment executionEnvironment{}; + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + rootDeviceEnvironment.osInterface = std::make_unique(); + rootDeviceEnvironment.osInterface->setDriverModel(std::make_unique(mockFd, rootDeviceEnvironment)); + auto drm = std::make_unique(rootDeviceEnvironment); + IoctlHelperUpstream ioctlHelper{*drm}; + + drm->ioctlRes = -1; + TimeStampData pGpuCpuTime{}; + std::unique_ptr osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface); + auto ret = ioctlHelper.setGpuCpuTimes(&pGpuCpuTime, osTime.get()); + EXPECT_EQ(false, ret); +} diff --git a/shared/test/unit_test/os_interface/linux/os_time_test.cpp b/shared/test/unit_test/os_interface/linux/os_time_test.cpp index 167e0ed842..6e0cfe35b4 100644 --- a/shared/test/unit_test/os_interface/linux/os_time_test.cpp +++ b/shared/test/unit_test/os_interface/linux/os_time_test.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/os_interface/linux/drm_neo.h" +#include "shared/source/os_interface/linux/ioctl_helper.h" #include "shared/source/os_interface/linux/os_time_linux.h" #include "shared/source/os_interface/os_interface.h" #include "shared/test/common/mocks/linux/mock_os_time_linux.h" @@ -71,33 +72,6 @@ TEST_F(DrmTimeTest, GivenFalseTimeFuncWhenGettingCpuTimeThenFails) { EXPECT_FALSE(error); } -TEST_F(DrmTimeTest, WhenGettingGpuTimeThenSuceeds) { - uint64_t time = 0; - auto pDrm = new DrmMockTime(mockFd, *executionEnvironment.rootDeviceEnvironments[0]); - osTime->updateDrm(pDrm); - auto error = osTime->getDeviceTime()->getGpuTime32(&time); - EXPECT_TRUE(error); - EXPECT_NE(0ULL, time); - error = osTime->getDeviceTime()->getGpuTime36(&time); - EXPECT_TRUE(error); - EXPECT_NE(0ULL, time); - error = osTime->getDeviceTime()->getGpuTimeSplitted(&time); - EXPECT_TRUE(error); - EXPECT_NE(0ULL, time); -} - -TEST_F(DrmTimeTest, GivenInvalidDrmWhenGettingGpuTimeThenFails) { - uint64_t time = 0; - auto pDrm = new DrmMockFail(*executionEnvironment.rootDeviceEnvironments[0]); - osTime->updateDrm(pDrm); - auto error = osTime->getDeviceTime()->getGpuTime32(&time); - EXPECT_FALSE(error); - error = osTime->getDeviceTime()->getGpuTime36(&time); - EXPECT_FALSE(error); - error = osTime->getDeviceTime()->getGpuTimeSplitted(&time); - EXPECT_FALSE(error); -} - TEST_F(DrmTimeTest, WhenGettingGpuCpuTimeThenSucceeds) { TimeStampData gpuCpuTime01 = {0, 0}; TimeStampData gpuCpuTime02 = {0, 0}; @@ -220,34 +194,6 @@ TEST_F(DrmTimeTest, GivenInvalidFuncTimeWhenGettingGpuCpuTimeCpuThenFails) { EXPECT_FALSE(error); } -TEST_F(DrmTimeTest, WhenGettingTimeThenTimeIsCorrect) { - auto drm = new DrmMockCustom(*executionEnvironment.rootDeviceEnvironments[0]); - osTime->updateDrm(drm); - - { - auto p = osTime->getDeviceTime()->getGpuTime; - EXPECT_EQ(p, &DeviceTimeDrm::getGpuTime36); - } - - { - drm->ioctlRes = -1; - osTime->getDeviceTime()->timestampTypeDetect(); - auto p = osTime->getDeviceTime()->getGpuTime; - EXPECT_EQ(p, &DeviceTimeDrm::getGpuTime32); - } - - DrmMockCustom::IoctlResExt ioctlToPass = {1, 0}; - { - drm->reset(); - drm->ioctlRes = -1; - drm->ioctlResExt = &ioctlToPass; // 2nd ioctl is successful - osTime->getDeviceTime()->timestampTypeDetect(); - auto p = osTime->getDeviceTime()->getGpuTime; - EXPECT_EQ(p, &DeviceTimeDrm::getGpuTimeSplitted); - drm->ioctlResExt = &drm->none; - } -} - TEST_F(DrmTimeTest, givenGpuTimestampResolutionQueryWhenIoctlFailsThenDefaultResolutionIsReturned) { auto defaultResolution = defaultHwInfo->capabilityTable.defaultProfilingTimerResolution; diff --git a/shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp b/shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp index a2b50e0bd1..f3b3dad0e0 100644 --- a/shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp @@ -17,6 +17,7 @@ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/libult/linux/drm_mock.h" +#include "shared/test/common/mocks/linux/mock_os_time_linux.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/test.h" @@ -26,12 +27,14 @@ using namespace NEO; struct MockIoctlHelperXe : IoctlHelperXe { using IoctlHelperXe::bindInfo; + using IoctlHelperXe::defaultEngine; using IoctlHelperXe::IoctlHelperXe; - using IoctlHelperXe::xeGetBindOpName; + using IoctlHelperXe::setDefaultEngine; + using IoctlHelperXe::xeGetBindFlagsName; + using IoctlHelperXe::xeGetBindOperationName; using IoctlHelperXe::xeGetClassName; using IoctlHelperXe::xeGetengineClassName; using IoctlHelperXe::xeShowBindTable; - using IoctlHelperXe::xeTimestampFrequency; }; TEST(IoctlHelperXeTest, givenXeDrmVersionsWhenGettingIoctlHelperThenValidIoctlHelperIsReturned) { @@ -145,31 +148,28 @@ class DrmMockXe : public DrmMockCustom { MemoryConstants::gigaByte // used size }; - auto xeQueryGts = reinterpret_cast(queryGts.begin()); - xeQueryGts->num_gt = 3; - xeQueryGts->gts[0] = { + auto xeQueryGtList = reinterpret_cast(queryGtList.begin()); + xeQueryGtList->num_gt = 3; + xeQueryGtList->gt_list[0] = { XE_QUERY_GT_TYPE_MAIN, // type - 0, // instance - 12500000, // clock freq - 0, // features + 0, // gt_id + 12500000, // clock_freq 0b100, // native mem regions 0x011, // slow mem regions 0 // inaccessible mem regions }; - xeQueryGts->gts[1] = { + xeQueryGtList->gt_list[1] = { XE_QUERY_GT_TYPE_MEDIA, // type - 1, // instance + 1, // gt_id 12500000, // clock freq - 0, // features 0b001, // native mem regions 0x110, // slow mem regions 0 // inaccessible mem regions }; - xeQueryGts->gts[2] = { + xeQueryGtList->gt_list[2] = { XE_QUERY_GT_TYPE_MAIN, // type - 0, // instance + 0, // gt_id 12500000, // clock freq - 0, // features 0b010, // native mem regions 0x101, // slow mem regions 0 // inaccessible mem regions @@ -187,11 +187,6 @@ class DrmMockXe : public DrmMockCustom { return setIoctlAnswer; } switch (request) { - case DrmIoctl::RegRead: { - struct drm_xe_mmio *reg = static_cast(arg); - reg->value = reg->addr; - ret = 0; - } break; case DrmIoctl::GemVmCreate: { struct drm_xe_vm_create *v = static_cast(arg); v->vm_id = testValueVmId; @@ -257,11 +252,11 @@ class DrmMockXe : public DrmMockCustom { } deviceQuery->size = sizeof(queryMemUsage); break; - case DRM_XE_DEVICE_QUERY_GTS: + case DRM_XE_DEVICE_QUERY_GT_LIST: if (deviceQuery->data) { - memcpy_s(reinterpret_cast(deviceQuery->data), deviceQuery->size, queryGts.begin(), sizeof(queryGts)); + memcpy_s(reinterpret_cast(deviceQuery->data), deviceQuery->size, queryGtList.begin(), sizeof(queryGtList)); } - deviceQuery->size = sizeof(queryGts); + deviceQuery->size = sizeof(queryGtList); break; case DRM_XE_DEVICE_QUERY_GT_TOPOLOGY: if (deviceQuery->data) { @@ -269,6 +264,12 @@ class DrmMockXe : public DrmMockCustom { } deviceQuery->size = static_cast(queryTopology.size()); break; + case DRM_XE_DEVICE_QUERY_ENGINE_CYCLES: + if (deviceQuery->data) { + memcpy_s(reinterpret_cast(deviceQuery->data), deviceQuery->size, queryEngineCycles, sizeof(queryEngineCycles)); + } + deviceQuery->size = sizeof(queryEngineCycles); + break; }; ret = 0; } break; @@ -335,9 +336,11 @@ class DrmMockXe : public DrmMockCustom { static_assert(sizeof(drm_xe_query_mem_region) == 12 * sizeof(uint64_t), ""); uint64_t queryMemUsage[37]{}; // 1 qword for num regions and 12 qwords per region - static_assert(sizeof(drm_xe_query_gts::drm_xe_query_gt) == 13 * sizeof(uint64_t), ""); - StackVec queryGts{}; // 1 qword for num gts and 13 qwords per gt - std::vector queryTopology; + static_assert(sizeof(drm_xe_query_gt) == 12 * sizeof(uint64_t), ""); + StackVec queryGtList{}; // 1 qword for num gts and 12 qwords per gt + alignas(64) std::vector queryTopology; + static_assert(sizeof(drm_xe_query_engine_cycles) == 6 * sizeof(uint64_t), ""); + uint64_t queryEngineCycles[6]{}; // 1 qword for eci and 5 qwords StackVec waitUserFenceInputs; StackVec vmBindInputs; StackVec syncInputs; @@ -644,7 +647,6 @@ TEST(IoctlHelperXeTest, givenIoctlHelperXeWhenCallingAnyMethodThenDummyValueIsRe verifyIoctlString(DrmIoctl::GemWaitUserFence, "DRM_IOCTL_XE_WAIT_USER_FENCE"); verifyIoctlString(DrmIoctl::PrimeFdToHandle, "DRM_IOCTL_PRIME_FD_TO_HANDLE"); verifyIoctlString(DrmIoctl::PrimeHandleToFd, "DRM_IOCTL_PRIME_HANDLE_TO_FD"); - verifyIoctlString(DrmIoctl::RegRead, "DRM_IOCTL_XE_MMIO"); EXPECT_TRUE(xeIoctlHelper->completionFenceExtensionSupported(true)); @@ -685,7 +687,6 @@ TEST(IoctlHelperXeTest, whenGettingIoctlRequestValueThenPropertValueIsReturned) verifyIoctlRequestValue(DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, DrmIoctl::GemContextDestroy); verifyIoctlRequestValue(DRM_IOCTL_PRIME_FD_TO_HANDLE, DrmIoctl::PrimeFdToHandle); verifyIoctlRequestValue(DRM_IOCTL_PRIME_HANDLE_TO_FD, DrmIoctl::PrimeHandleToFd); - verifyIoctlRequestValue(DRM_IOCTL_XE_MMIO, DrmIoctl::RegRead); EXPECT_THROW(xeIoctlHelper->getIoctlRequestValue(DrmIoctl::DebuggerOpen), std::runtime_error); } @@ -701,8 +702,12 @@ TEST(IoctlHelperXeTest, verifyPublicFunctions) { EXPECT_STREQ(name, mockXeIoctlHelper->xeGetClassName(xeClass)); }; - auto verifyXeOpBindName = [&mockXeIoctlHelper](const char *name, auto bind) { - EXPECT_STREQ(name, mockXeIoctlHelper->xeGetBindOpName(bind)); + auto verifyXeOperationBindName = [&mockXeIoctlHelper](const char *name, auto bind) { + EXPECT_STREQ(name, mockXeIoctlHelper->xeGetBindOperationName(bind)); + }; + + auto verifyXeFlagsBindName = [&mockXeIoctlHelper](const char *name, auto flags) { + EXPECT_STREQ(name, mockXeIoctlHelper->xeGetBindFlagsName(flags)); }; auto verifyXeEngineClassName = [&mockXeIoctlHelper](const char *name, auto engineClass) { @@ -715,12 +720,18 @@ TEST(IoctlHelperXeTest, verifyPublicFunctions) { verifyXeClassName("vecs", DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE); verifyXeClassName("ccs", DRM_XE_ENGINE_CLASS_COMPUTE); - verifyXeOpBindName("MAP", XE_VM_BIND_OP_MAP); - verifyXeOpBindName("UNMAP", XE_VM_BIND_OP_UNMAP); - verifyXeOpBindName("MAP_USERPTR", XE_VM_BIND_OP_MAP_USERPTR); - verifyXeOpBindName("AS_MAP", XE_VM_BIND_OP_MAP | XE_VM_BIND_FLAG_ASYNC); - verifyXeOpBindName("AS_MAP_USERPTR", XE_VM_BIND_OP_MAP_USERPTR | XE_VM_BIND_FLAG_ASYNC); - verifyXeOpBindName("unknown_OP", -1); + verifyXeOperationBindName("MAP", XE_VM_BIND_OP_MAP); + verifyXeOperationBindName("UNMAP", XE_VM_BIND_OP_UNMAP); + verifyXeOperationBindName("MAP_USERPTR", XE_VM_BIND_OP_MAP_USERPTR); + verifyXeOperationBindName("UNMAP ALL", XE_VM_BIND_OP_UNMAP_ALL); + verifyXeOperationBindName("PREFETCH", XE_VM_BIND_OP_PREFETCH); + verifyXeOperationBindName("Unknown operation", -1); + + verifyXeFlagsBindName("READ_ONLY", XE_VM_BIND_FLAG_READONLY); + verifyXeFlagsBindName("ASYNC", XE_VM_BIND_FLAG_ASYNC); + verifyXeFlagsBindName("IMMEDIATE", XE_VM_BIND_FLAG_IMMEDIATE); + verifyXeFlagsBindName("NULL", XE_VM_BIND_FLAG_NULL); + verifyXeFlagsBindName("Unknown flag", -1); verifyXeEngineClassName("DRM_XE_ENGINE_CLASS_RENDER", DRM_XE_ENGINE_CLASS_RENDER); verifyXeEngineClassName("DRM_XE_ENGINE_CLASS_COPY", DRM_XE_ENGINE_CLASS_COPY); @@ -767,8 +778,7 @@ TEST(IoctlHelperXeTest, whenCallingIoctlThenProperValueIsReturned) { DebugManagerStateRestore restorer; auto executionEnvironment = std::make_unique(); DrmMockXe drm{*executionEnvironment->rootDeviceEnvironments[0]}; - auto xeIoctlHelper = std::make_unique(drm); - auto mockXeIoctlHelper = static_cast(xeIoctlHelper.get()); + auto mockXeIoctlHelper = std::make_unique(drm); drm.reset(); { @@ -787,17 +797,10 @@ TEST(IoctlHelperXeTest, whenCallingIoctlThenProperValueIsReturned) { ret = mockXeIoctlHelper->ioctl(DrmIoctl::GemClose, &cl); EXPECT_EQ(0, ret); } - { - RegisterRead test = {}; - test.offset = REG_GLOBAL_TIMESTAMP_LDW; - ret = mockXeIoctlHelper->ioctl(DrmIoctl::RegRead, &test); - EXPECT_EQ(0, ret); - EXPECT_EQ(test.offset, test.value); - } { GemVmControl test = {}; drm.pageFaultSupported = false; - uint32_t expectedVmCreateFlags = DRM_XE_VM_CREATE_ASYNC_BIND_OPS | + uint32_t expectedVmCreateFlags = DRM_XE_VM_CREATE_ASYNC_DEFAULT | DRM_XE_VM_CREATE_COMPUTE_MODE; ret = mockXeIoctlHelper->ioctl(DrmIoctl::GemVmCreate, &test); EXPECT_EQ(0, ret); @@ -805,7 +808,7 @@ TEST(IoctlHelperXeTest, whenCallingIoctlThenProperValueIsReturned) { EXPECT_EQ(test.flags, expectedVmCreateFlags); drm.pageFaultSupported = true; - expectedVmCreateFlags = DRM_XE_VM_CREATE_ASYNC_BIND_OPS | + expectedVmCreateFlags = DRM_XE_VM_CREATE_ASYNC_DEFAULT | DRM_XE_VM_CREATE_COMPUTE_MODE | DRM_XE_VM_CREATE_FAULT_MODE; ret = mockXeIoctlHelper->ioctl(DrmIoctl::GemVmCreate, &test); @@ -898,6 +901,8 @@ TEST(IoctlHelperXeTest, whenCallingIoctlThenProperValueIsReturned) { ret = mockXeIoctlHelper->ioctl(DrmIoctl::GemClose, &test); EXPECT_EQ(0, ret); } + auto engineInfo = mockXeIoctlHelper->createEngineInfo(false); + EXPECT_NE(nullptr, engineInfo); { GetParam test = {}; int dstvalue; @@ -1037,13 +1042,12 @@ TEST(IoctlHelperXeTest, givenOnlyMediaTypeWhenGetTopologyDataAndMapThenSubsliceI auto executionEnvironment = std::make_unique(); DrmMockXe drm{*executionEnvironment->rootDeviceEnvironments[0]}; - auto xeQueryGts = reinterpret_cast(drm.queryGts.begin()); - xeQueryGts->num_gt = 1; - xeQueryGts->gts[0] = { + auto xeQueryGtList = reinterpret_cast(drm.queryGtList.begin()); + xeQueryGtList->num_gt = 1; + xeQueryGtList->gt_list[0] = { XE_QUERY_GT_TYPE_MEDIA, // type - 0, // instance + 0, // gt_id 12500000, // clock freq - 0, // features 0b100, // native mem regions 0x011, // slow mem regions 0 // inaccessible mem regions @@ -1083,41 +1087,37 @@ TEST(IoctlHelperXeTest, givenMainAndMediaTypesWhenGetTopologyDataAndMapThenResul auto executionEnvironment = std::make_unique(); DrmMockXe drm{*executionEnvironment->rootDeviceEnvironments[0]}; - drm.queryGts.resize(53); - auto xeQueryGts = reinterpret_cast(drm.queryGts.begin()); - xeQueryGts->num_gt = 4; - xeQueryGts->gts[0] = { + drm.queryGtList.resize(49); + auto xeQueryGtList = reinterpret_cast(drm.queryGtList.begin()); + xeQueryGtList->num_gt = 4; + xeQueryGtList->gt_list[0] = { XE_QUERY_GT_TYPE_MAIN, // type - 0, // instance + 0, // gt_id 12500000, // clock freq - 0, // features 0b100, // native mem regions 0x011, // slow mem regions 0 // inaccessible mem regions }; - xeQueryGts->gts[1] = { + xeQueryGtList->gt_list[1] = { XE_QUERY_GT_TYPE_MEDIA, // type - 0, // instance + 0, // gt_id 12500000, // clock freq - 0, // features 0b100, // native mem regions 0x011, // slow mem regions 0 // inaccessible mem regions }; - xeQueryGts->gts[2] = { + xeQueryGtList->gt_list[2] = { XE_QUERY_GT_TYPE_MAIN, // type - 0, // instance + 0, // gt_id 12500000, // clock freq - 0, // features 0b010, // native mem regions 0x101, // slow mem regions 0 // inaccessible mem regions }; - xeQueryGts->gts[3] = { + xeQueryGtList->gt_list[3] = { XE_QUERY_GT_TYPE_MEDIA, // type - 0, // instance + 0, // gt_id 12500000, // clock freq - 0, // features 0b001, // native mem regions 0x100, // slow mem regions 0 // inaccessible mem regions @@ -1182,23 +1182,21 @@ struct DrmMockXe2T : public DrmMockXe { 4 * MemoryConstants::gigaByte, // total size MemoryConstants::gigaByte // used size }; - queryGts.resize(27); - auto xeQueryGts = reinterpret_cast(queryGts.begin()); - xeQueryGts->num_gt = 2; - xeQueryGts->gts[0] = { + queryGtList.resize(25); + auto xeQueryGtList = reinterpret_cast(queryGtList.begin()); + xeQueryGtList->num_gt = 2; + xeQueryGtList->gt_list[0] = { XE_QUERY_GT_TYPE_MAIN, // type - 0, // instance + 0, // gt_id 12500000, // clock freq - 0, // features 0b100, // native mem regions 0x011, // slow mem regions 0 // inaccessible mem regions }; - xeQueryGts->gts[1] = { + xeQueryGtList->gt_list[1] = { XE_QUERY_GT_TYPE_MAIN, // type - 0, // instance + 0, // gt_id 12500000, // clock freq - 0, // features 0b010, // native mem regions 0x101, // slow mem regions 0 // inaccessible mem regions @@ -1559,8 +1557,6 @@ TEST(IoctlHelperXeTest, whenCreatingMemoryInfoThenProperMemoryBanksAreDiscovered EXPECT_EQ(1u, memoryRegions[2].region.memoryInstance); EXPECT_EQ(2 * MemoryConstants::gigaByte, memoryRegions[2].probedSize); EXPECT_EQ(2 * MemoryConstants::gigaByte - MemoryConstants::megaByte, memoryRegions[2].unallocatedSize); - - EXPECT_EQ(12500000u, xeIoctlHelper->xeTimestampFrequency); } TEST(IoctlHelperXeTest, givenIoctlFailureWhenCreatingMemoryInfoThenNoMemoryBanksAreDiscovered) { @@ -1819,3 +1815,109 @@ TEST(IoctlHelperXeTest, whenFillBindInfoForIpcHandleIsCalledThenBindInfoIsCorrec EXPECT_EQ(bindInfo.handle, handle); EXPECT_EQ(bindInfo.size, size); } + +TEST(IoctlHelperXeTest, givenIoctlFailureWhenGetTimestampFrequencyIsCalledThenFalseIsReturned) { + DebugManagerStateRestore restorer; + auto executionEnvironment = std::make_unique(); + DrmMockXe drm{*executionEnvironment->rootDeviceEnvironments[0]}; + auto xeIoctlHelper = std::make_unique(drm); + auto engineInfo = xeIoctlHelper->createEngineInfo(false); + ASSERT_NE(nullptr, engineInfo); + + drm.testMode(1, -1); + uint64_t frequency; + auto ret = xeIoctlHelper->getTimestampFrequency(frequency); + EXPECT_EQ(false, ret); +} + +TEST(IoctlHelperXeTest, whenGetTimestampFrequencyIsCalledThenProperFrequencyIsSet) { + DebugManagerStateRestore restorer; + auto executionEnvironment = std::make_unique(); + DrmMockXe drm{*executionEnvironment->rootDeviceEnvironments[0]}; + auto xeIoctlHelper = std::make_unique(drm); + auto engineInfo = xeIoctlHelper->createEngineInfo(false); + ASSERT_NE(nullptr, engineInfo); + + uint64_t expectedFrequency = 100; + auto xeQueryEngineCycles = reinterpret_cast(drm.queryEngineCycles); + xeQueryEngineCycles->engine_frequency = expectedFrequency; + + uint64_t frequency = 0; + auto ret = xeIoctlHelper->getTimestampFrequency(frequency); + EXPECT_EQ(true, ret); + EXPECT_EQ(expectedFrequency, frequency); +} + +TEST(IoctlHelperXeTest, givenIoctlFailureWhenSetGpuCpuTimesIsCalledThenFalseIsReturned) { + DebugManagerStateRestore restorer; + auto executionEnvironment = std::make_unique(); + auto &rootDeviceEnvironment = *executionEnvironment->rootDeviceEnvironments[0]; + rootDeviceEnvironment.osInterface = std::make_unique(); + rootDeviceEnvironment.osInterface->setDriverModel(std::make_unique(mockFd, rootDeviceEnvironment)); + DrmMockXe drm{rootDeviceEnvironment}; + auto xeIoctlHelper = std::make_unique(drm); + auto engineInfo = xeIoctlHelper->createEngineInfo(false); + ASSERT_NE(nullptr, engineInfo); + + drm.testMode(1, -1); + TimeStampData pGpuCpuTime{}; + std::unique_ptr osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface); + auto ret = xeIoctlHelper->setGpuCpuTimes(&pGpuCpuTime, osTime.get()); + EXPECT_EQ(false, ret); +} + +TEST(IoctlHelperXeTest, givenIoctlFailureWhenSetGpuCpuTimesIsCalledThenProperValuesAreSet) { + DebugManagerStateRestore restorer; + auto executionEnvironment = std::make_unique(); + auto &rootDeviceEnvironment = *executionEnvironment->rootDeviceEnvironments[0]; + rootDeviceEnvironment.osInterface = std::make_unique(); + rootDeviceEnvironment.osInterface->setDriverModel(std::make_unique(mockFd, rootDeviceEnvironment)); + DrmMockXe drm{rootDeviceEnvironment}; + auto xeIoctlHelper = std::make_unique(drm); + auto engineInfo = xeIoctlHelper->createEngineInfo(false); + ASSERT_NE(nullptr, engineInfo); + + uint64_t expectedCycles = 100000; + uint64_t expectedTimestamp = 100; + auto xeQueryEngineCycles = reinterpret_cast(drm.queryEngineCycles); + xeQueryEngineCycles->width = 32; + xeQueryEngineCycles->engine_cycles = expectedCycles; + xeQueryEngineCycles->cpu_timestamp = expectedTimestamp; + + TimeStampData pGpuCpuTime{}; + std::unique_ptr osTime = MockOSTimeLinux::create(*rootDeviceEnvironment.osInterface); + auto ret = xeIoctlHelper->setGpuCpuTimes(&pGpuCpuTime, osTime.get()); + EXPECT_EQ(true, ret); + EXPECT_EQ(pGpuCpuTime.gpuTimeStamp, expectedCycles); + EXPECT_EQ(pGpuCpuTime.cpuTimeinNS, expectedTimestamp); +} + +TEST(IoctlHelperXeTest, whenSetDefaultEngineIsCalledThenProperEngineIsSet) { + NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); + auto executionEnvironment = std::make_unique(&hwInfo); + DrmMockXe drm{*executionEnvironment->rootDeviceEnvironments[0]}; + auto xeIoctlHelper = std::make_unique(drm); + + auto engineInfo = xeIoctlHelper->createEngineInfo(true); + ASSERT_NE(nullptr, engineInfo); + + xeIoctlHelper->setDefaultEngine(); + EXPECT_EQ(DRM_XE_ENGINE_CLASS_COMPUTE, xeIoctlHelper->defaultEngine->engine_class); +} + +TEST(IoctlHelperXeTest, givenNoEnginesWhenSetDefaultEngineIsCalledThenAbortIsThrown) { + auto executionEnvironment = std::make_unique(); + DrmMockXe drm{*executionEnvironment->rootDeviceEnvironments[0]}; + auto xeIoctlHelper = std::make_unique(drm); + + EXPECT_THROW(xeIoctlHelper->setDefaultEngine(), std::exception); +} + +TEST(IoctlHelperXeTest, givenXeIoctlHelperWhenInitializeGetGpuTimeFunctionIsCalledGetGpuFunctionIsNotSet) { + auto executionEnvironment = std::make_unique(); + DrmMockXe drm{*executionEnvironment->rootDeviceEnvironments[0]}; + auto xeIoctlHelper = std::make_unique(drm); + + xeIoctlHelper->initializeGetGpuTimeFunction(); + EXPECT_EQ(xeIoctlHelper->getGpuTime, nullptr); +} \ No newline at end of file diff --git a/third_party/uapi/drm/xe_drm.h b/third_party/uapi/drm/xe_drm.h index 804c02270d..9761d62d81 100644 --- a/third_party/uapi/drm/xe_drm.h +++ b/third_party/uapi/drm/xe_drm.h @@ -106,11 +106,10 @@ struct xe_user_extension { #define DRM_XE_EXEC_QUEUE_CREATE 0x06 #define DRM_XE_EXEC_QUEUE_DESTROY 0x07 #define DRM_XE_EXEC 0x08 -#define DRM_XE_MMIO 0x09 -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x0a -#define DRM_XE_WAIT_USER_FENCE 0x0b -#define DRM_XE_VM_MADVISE 0x0c -#define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x0d +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x09 +#define DRM_XE_WAIT_USER_FENCE 0x0a +#define DRM_XE_VM_MADVISE 0x0b +#define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x0c /* Must be kept compact -- no holes */ #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) @@ -123,11 +122,30 @@ struct xe_user_extension { #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) #define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct drm_xe_exec_queue_destroy) #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) -#define DRM_IOCTL_XE_MMIO DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_MMIO, struct drm_xe_mmio) #define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) #define DRM_IOCTL_XE_VM_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise) +/** struct drm_xe_engine_class_instance - instance of an engine class */ +struct drm_xe_engine_class_instance { +#define DRM_XE_ENGINE_CLASS_RENDER 0 +#define DRM_XE_ENGINE_CLASS_COPY 1 +#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE 2 +#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3 +#define DRM_XE_ENGINE_CLASS_COMPUTE 4 + /* + * Kernel only classes (not actual hardware engine class). Used for + * creating ordered queues of VM bind operations. + */ +#define DRM_XE_ENGINE_CLASS_VM_BIND_ASYNC 5 +#define DRM_XE_ENGINE_CLASS_VM_BIND_SYNC 6 + __u16 engine_class; + + __u16 engine_instance; + __u16 gt_id; + __u16 rsvd; +}; + /** * enum drm_xe_memory_class - Supported memory classes. */ @@ -219,6 +237,60 @@ struct drm_xe_query_mem_region { __u64 reserved[6]; }; +/** + * struct drm_xe_query_engine_cycles - correlate CPU and GPU timestamps + * + * If a query is made with a struct drm_xe_device_query where .query is equal to + * DRM_XE_DEVICE_QUERY_ENGINE_CYCLES, then the reply uses struct drm_xe_query_engine_cycles + * in .data. struct drm_xe_query_engine_cycles is allocated by the user and + * .data points to this allocated structure. + * + * The query returns the engine cycles and the frequency that can + * be used to calculate the engine timestamp. In addition the + * query returns a set of cpu timestamps that indicate when the command + * streamer cycle count was captured. + */ +struct drm_xe_query_engine_cycles { + /** + * @eci: This is input by the user and is the engine for which command + * streamer cycles is queried. + */ + struct drm_xe_engine_class_instance eci; + + /** + * @clockid: This is input by the user and is the reference clock id for + * CPU timestamp. For definition, see clock_gettime(2) and + * perf_event_open(2). Supported clock ids are CLOCK_MONOTONIC, + * CLOCK_MONOTONIC_RAW, CLOCK_REALTIME, CLOCK_BOOTTIME, CLOCK_TAI. + */ + __s32 clockid; + + /** @width: Width of the engine cycle counter in bits. */ + __u32 width; + + /** + * @engine_cycles: Engine cycles as read from its register + * at 0x358 offset. + */ + __u64 engine_cycles; + + /** @engine_frequency: Frequency of the engine cycles in Hz. */ + __u64 engine_frequency; + + /** + * @cpu_timestamp: CPU timestamp in ns. The timestamp is captured before + * reading the engine_cycles register using the reference clockid set by the + * user. + */ + __u64 cpu_timestamp; + + /** + * @cpu_delta: Time delta in ns captured around reading the lower dword + * of the engine_cycles register. + */ + __u64 cpu_delta; +}; + /** * struct drm_xe_query_mem_usage - describe memory regions and usage * @@ -256,46 +328,65 @@ struct drm_xe_query_config { #define XE_QUERY_CONFIG_VA_BITS 3 #define XE_QUERY_CONFIG_GT_COUNT 4 #define XE_QUERY_CONFIG_MEM_REGION_COUNT 5 -#define XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY 6 -#define XE_QUERY_CONFIG_NUM_PARAM (XE_QUERY_CONFIG_MAX_ENGINE_PRIORITY + 1) +#define XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY 6 +#define XE_QUERY_CONFIG_NUM_PARAM (XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY + 1) /** @info: array of elements containing the config info */ __u64 info[]; }; /** - * struct drm_xe_query_gts - describe GTs + * struct drm_xe_query_gt - describe an individual GT. * - * If a query is made with a struct drm_xe_device_query where .query - * is equal to DRM_XE_DEVICE_QUERY_GTS, then the reply uses struct - * drm_xe_query_gts in .data. + * To be used with drm_xe_query_gt_list, which will return a list with all the + * existing GT individual descriptions. + * Graphics Technology (GT) is a subset of a GPU/tile that is responsible for + * implementing graphics and/or media operations. */ -struct drm_xe_query_gts { - /** @num_gt: number of GTs returned in gts */ - __u32 num_gt; - - /** @pad: MBZ */ - __u32 pad; - - /** - * @gts: The GTs returned for this device - * - * TODO: convert drm_xe_query_gt to proper kernel-doc. - * TODO: Perhaps info about every mem region relative to this GT? e.g. - * bandwidth between this GT and remote region? - */ - struct drm_xe_query_gt { +struct drm_xe_query_gt { #define XE_QUERY_GT_TYPE_MAIN 0 #define XE_QUERY_GT_TYPE_REMOTE 1 #define XE_QUERY_GT_TYPE_MEDIA 2 - __u16 type; - __u16 instance; - __u32 clock_freq; - __u64 features; - __u64 native_mem_regions; /* bit mask of instances from drm_xe_query_mem_usage */ - __u64 slow_mem_regions; /* bit mask of instances from drm_xe_query_mem_usage */ - __u64 inaccessible_mem_regions; /* bit mask of instances from drm_xe_query_mem_usage */ - __u64 reserved[8]; - } gts[]; + /** @type: GT type: Main, Remote, or Media */ + __u16 type; + /** @gt_id: Unique ID of this GT within the PCI Device */ + __u16 gt_id; + /** @clock_freq: A clock frequency for timestamp */ + __u32 clock_freq; + /** + * @native_mem_regions: Bit mask of instances from + * drm_xe_query_mem_usage that lives on the same GPU/Tile and have + * direct access. + */ + __u64 native_mem_regions; + /** + * @slow_mem_regions: Bit mask of instances from + * drm_xe_query_mem_usage that this GT can indirectly access, although + * they live on a different GPU/Tile. + */ + __u64 slow_mem_regions; + /** + * @inaccessible_mem_regions: Bit mask of instances from + * drm_xe_query_mem_usage that is not accessible by this GT at all. + */ + __u64 inaccessible_mem_regions; + /** @reserved: Reserved */ + __u64 reserved[8]; +}; + +/** + * struct drm_xe_query_gt_list - A list with GT description items. + * + * If a query is made with a struct drm_xe_device_query where .query + * is equal to DRM_XE_DEVICE_QUERY_GT_LIST, then the reply uses struct + * drm_xe_query_gt_list in .data. + */ +struct drm_xe_query_gt_list { + /** @num_gt: number of GT items returned in gt_list */ + __u32 num_gt; + /** @pad: MBZ */ + __u32 pad; + /** @gt_list: The GT list returned for this device */ + struct drm_xe_query_gt gt_list[]; }; /** @@ -385,12 +476,13 @@ struct drm_xe_device_query { /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; -#define DRM_XE_DEVICE_QUERY_ENGINES 0 -#define DRM_XE_DEVICE_QUERY_MEM_USAGE 1 -#define DRM_XE_DEVICE_QUERY_CONFIG 2 -#define DRM_XE_DEVICE_QUERY_GTS 3 -#define DRM_XE_DEVICE_QUERY_HWCONFIG 4 -#define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 +#define DRM_XE_DEVICE_QUERY_ENGINES 0 +#define DRM_XE_DEVICE_QUERY_MEM_USAGE 1 +#define DRM_XE_DEVICE_QUERY_CONFIG 2 +#define DRM_XE_DEVICE_QUERY_GT_LIST 3 +#define DRM_XE_DEVICE_QUERY_HWCONFIG 4 +#define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 +#define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6 /** @query: The type of data to query */ __u32 query; @@ -480,29 +572,11 @@ struct drm_xe_gem_mmap_offset { __u64 reserved[2]; }; -/** - * struct drm_xe_vm_bind_op_error_capture - format of VM bind op error capture - */ -struct drm_xe_vm_bind_op_error_capture { - /** @error: errno that occurred */ - __s32 error; - - /** @op: operation that encounter an error */ - __u32 op; - - /** @addr: address of bind op */ - __u64 addr; - - /** @size: size of bind */ - __u64 size; -}; - -/** struct drm_xe_ext_vm_set_property - VM set property extension */ -struct drm_xe_ext_vm_set_property { +/** struct drm_xe_ext_set_property - XE set property extension */ +struct drm_xe_ext_set_property { /** @base: base user extension */ struct xe_user_extension base; -#define XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS 0 /** @property: property to set */ __u32 property; @@ -523,7 +597,7 @@ struct drm_xe_vm_create { #define DRM_XE_VM_CREATE_SCRATCH_PAGE (0x1 << 0) #define DRM_XE_VM_CREATE_COMPUTE_MODE (0x1 << 1) -#define DRM_XE_VM_CREATE_ASYNC_BIND_OPS (0x1 << 2) +#define DRM_XE_VM_CREATE_ASYNC_DEFAULT (0x1 << 2) #define DRM_XE_VM_CREATE_FAULT_MODE (0x1 << 3) /** @flags: Flags */ __u32 flags; @@ -583,41 +657,18 @@ struct drm_xe_vm_bind_op { #define XE_VM_BIND_OP_MAP 0x0 #define XE_VM_BIND_OP_UNMAP 0x1 #define XE_VM_BIND_OP_MAP_USERPTR 0x2 -#define XE_VM_BIND_OP_RESTART 0x3 -#define XE_VM_BIND_OP_UNMAP_ALL 0x4 -#define XE_VM_BIND_OP_PREFETCH 0x5 +#define XE_VM_BIND_OP_UNMAP_ALL 0x3 +#define XE_VM_BIND_OP_PREFETCH 0x4 + /** @op: Bind operation to perform */ + __u32 op; -#define XE_VM_BIND_FLAG_READONLY (0x1 << 16) - /* - * A bind ops completions are always async, hence the support for out - * sync. This flag indicates the allocation of the memory for new page - * tables and the job to program the pages tables is asynchronous - * relative to the IOCTL. That part of a bind operation can fail under - * memory pressure, the job in practice can't fail unless the system is - * totally shot. - * - * If this flag is clear and the IOCTL doesn't return an error, in - * practice the bind op is good and will complete. - * - * If this flag is set and doesn't return an error, the bind op can - * still fail and recovery is needed. If configured, the bind op that - * caused the error will be captured in drm_xe_vm_bind_op_error_capture. - * Once the user sees the error (via a ufence + - * XE_VM_PROPERTY_BIND_OP_ERROR_CAPTURE_ADDRESS), it should free memory - * via non-async unbinds, and then restart all queued async binds op via - * XE_VM_BIND_OP_RESTART. Or alternatively the user should destroy the - * VM. - * - * This flag is only allowed when DRM_XE_VM_CREATE_ASYNC_BIND_OPS is - * configured in the VM and must be set if the VM is configured with - * DRM_XE_VM_CREATE_ASYNC_BIND_OPS and not in an error state. - */ -#define XE_VM_BIND_FLAG_ASYNC (0x1 << 17) +#define XE_VM_BIND_FLAG_READONLY (0x1 << 0) +#define XE_VM_BIND_FLAG_ASYNC (0x1 << 1) /* * Valid on a faulting VM only, do the MAP operation immediately rather * than deferring the MAP to the page fault handler. */ -#define XE_VM_BIND_FLAG_IMMEDIATE (0x1 << 18) +#define XE_VM_BIND_FLAG_IMMEDIATE (0x1 << 2) /* * When the NULL flag is set, the page tables are setup with a special * bit which indicates writes are dropped and all reads return zero. In @@ -625,9 +676,9 @@ struct drm_xe_vm_bind_op { * operations, the BO handle MBZ, and the BO offset MBZ. This flag is * intended to implement VK sparse bindings. */ -#define XE_VM_BIND_FLAG_NULL (0x1 << 19) - /** @op: Operation to perform (lower 16 bits) and flags (upper 16 bits) */ - __u32 op; +#define XE_VM_BIND_FLAG_NULL (0x1 << 3) + /** @flags: Bind flags */ + __u32 flags; /** @mem_region: Memory region to prefetch VMA to, instance not a mask */ __u32 region; @@ -680,21 +731,6 @@ struct drm_xe_vm_bind { __u64 reserved[2]; }; -/** struct drm_xe_ext_exec_queue_set_property - exec queue set property extension */ -struct drm_xe_ext_exec_queue_set_property { - /** @base: base user extension */ - struct xe_user_extension base; - - /** @property: property to set */ - __u32 property; - - /** @pad: MBZ */ - __u32 pad; - - /** @value: property value */ - __u64 value; -}; - /** * struct drm_xe_exec_queue_set_property - exec queue set property * @@ -707,21 +743,14 @@ struct drm_xe_exec_queue_set_property { /** @exec_queue_id: Exec queue ID */ __u32 exec_queue_id; -#define XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 +#define XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 #define XE_EXEC_QUEUE_SET_PROPERTY_PREEMPTION_TIMEOUT 2 - /* - * Long running or ULLS engine mode. DMA fences not allowed in this - * mode. Must match the value of DRM_XE_VM_CREATE_COMPUTE_MODE, serves - * as a sanity check the UMD knows what it is doing. Can only be set at - * engine create time. - */ -#define XE_EXEC_QUEUE_SET_PROPERTY_COMPUTE_MODE 3 -#define XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE 4 -#define XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 5 -#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 6 -#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 7 -#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 8 +#define XE_EXEC_QUEUE_SET_PROPERTY_PERSISTENCE 3 +#define XE_EXEC_QUEUE_SET_PROPERTY_JOB_TIMEOUT 4 +#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 5 +#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 6 +#define XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 7 /** @property: property to set */ __u32 property; @@ -732,24 +761,6 @@ struct drm_xe_exec_queue_set_property { __u64 reserved[2]; }; -/** struct drm_xe_engine_class_instance - instance of an engine class */ -struct drm_xe_engine_class_instance { -#define DRM_XE_ENGINE_CLASS_RENDER 0 -#define DRM_XE_ENGINE_CLASS_COPY 1 -#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE 2 -#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3 -#define DRM_XE_ENGINE_CLASS_COMPUTE 4 - /* - * Kernel only class (not actual hardware engine class). Used for - * creating ordered queues of VM bind operations. - */ -#define DRM_XE_ENGINE_CLASS_VM_BIND 5 - __u16 engine_class; - - __u16 engine_instance; - __u16 gt_id; -}; - struct drm_xe_exec_queue_create { #define XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0 /** @extensions: Pointer to the first extension struct, if any */ @@ -878,27 +889,6 @@ struct drm_xe_exec { __u64 reserved[2]; }; -struct drm_xe_mmio { - /** @extensions: Pointer to the first extension struct, if any */ - __u64 extensions; - - __u32 addr; - -#define DRM_XE_MMIO_8BIT 0x0 -#define DRM_XE_MMIO_16BIT 0x1 -#define DRM_XE_MMIO_32BIT 0x2 -#define DRM_XE_MMIO_64BIT 0x3 -#define DRM_XE_MMIO_BITS_MASK 0x3 -#define DRM_XE_MMIO_READ 0x4 -#define DRM_XE_MMIO_WRITE 0x8 - __u32 flags; - - __u64 value; - - /** @reserved: Reserved */ - __u64 reserved[2]; -}; - /** * struct drm_xe_wait_user_fence - wait user fence * @@ -913,18 +903,10 @@ struct drm_xe_wait_user_fence { /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; - union { - /** - * @addr: user pointer address to wait on, must qword aligned - */ - __u64 addr; - - /** - * @vm_id: The ID of the VM which encounter an error used with - * DRM_XE_UFENCE_WAIT_VM_ERROR. Upper 32 bits must be clear. - */ - __u64 vm_id; - }; + /** + * @addr: user pointer address to wait on, must qword aligned + */ + __u64 addr; #define DRM_XE_UFENCE_WAIT_EQ 0 #define DRM_XE_UFENCE_WAIT_NEQ 1 @@ -937,7 +919,6 @@ struct drm_xe_wait_user_fence { #define DRM_XE_UFENCE_WAIT_SOFT_OP (1 << 0) /* e.g. Wait on VM bind */ #define DRM_XE_UFENCE_WAIT_ABSTIME (1 << 1) -#define DRM_XE_UFENCE_WAIT_VM_ERROR (1 << 2) /** @flags: wait flags */ __u16 flags; @@ -1053,8 +1034,48 @@ struct drm_xe_vm_madvise { __u64 reserved[2]; }; +/** + * DOC: XE PMU event config IDs + * + * Check 'man perf_event_open' to use the ID's XE_PMU_XXXX listed in xe_drm.h + * in 'struct perf_event_attr' as part of perf_event_open syscall to read a + * particular event. + * + * For example to open the XE_PMU_INTERRUPTS(0): + * + * .. code-block:: C + * + * struct perf_event_attr attr; + * long long count; + * int cpu = 0; + * int fd; + * + * memset(&attr, 0, sizeof(struct perf_event_attr)); + * attr.type = type; // eg: /sys/bus/event_source/devices/xe_0000_56_00.0/type + * attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED; + * attr.use_clockid = 1; + * attr.clockid = CLOCK_MONOTONIC; + * attr.config = XE_PMU_INTERRUPTS(0); + * + * fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); + */ + +/* + * Top bits of every counter are GT id. + */ +#define __XE_PMU_GT_SHIFT (56) + +#define ___XE_PMU_OTHER(gt, x) \ + (((__u64)(x)) | ((__u64)(gt) << __XE_PMU_GT_SHIFT)) + +#define XE_PMU_INTERRUPTS(gt) ___XE_PMU_OTHER(gt, 0) +#define XE_PMU_RENDER_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 1) +#define XE_PMU_COPY_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 2) +#define XE_PMU_MEDIA_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 3) +#define XE_PMU_ANY_ENGINE_GROUP_BUSY(gt) ___XE_PMU_OTHER(gt, 4) + #if defined(__cplusplus) } #endif -#endif /* _XE_DRM_H_ */ +#endif /* _XE_DRM_H_ */ \ No newline at end of file