From 4964fb8e44cc8c8e5f8ac5184f538d27f7b0e9f1 Mon Sep 17 00:00:00 2001 From: Young Jin Yoon Date: Wed, 6 Dec 2023 18:16:29 +0000 Subject: [PATCH] build: update i915 prelim headers third_party/uapi/prelim/drm from prelim v2.0-rc21 https://github.com/intel-gpu/drm-uapi-helper Related-To: NEO-8276 Signed-off-by: Young Jin Yoon --- third_party/uapi/prelim/drm/i915_drm.h | 6 + third_party/uapi/prelim/drm/i915_drm_prelim.h | 147 ++++++++++++++++-- .../uapi/prelim/drm/intel_hwconfig_types.h | 6 +- 3 files changed, 143 insertions(+), 16 deletions(-) diff --git a/third_party/uapi/prelim/drm/i915_drm.h b/third_party/uapi/prelim/drm/i915_drm.h index 229d5f9fdf..aa130a3e0e 100644 --- a/third_party/uapi/prelim/drm/i915_drm.h +++ b/third_party/uapi/prelim/drm/i915_drm.h @@ -637,6 +637,12 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) #define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) +/* + * BUSY_STATS is deprecated on platforms with GuC based submission and will not + * be available at all on newer platforms. It has accuracy issues due to the + * conversions from tick counts to wall time. + * BUSY_TICKS_STATS should be used instead. + */ #define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4) /* * Indicates the 2k user priority levels are statically mapped into 3 buckets as diff --git a/third_party/uapi/prelim/drm/i915_drm_prelim.h b/third_party/uapi/prelim/drm/i915_drm_prelim.h index 3501f3abba..ff29438e82 100644 --- a/third_party/uapi/prelim/drm/i915_drm_prelim.h +++ b/third_party/uapi/prelim/drm/i915_drm_prelim.h @@ -66,10 +66,27 @@ struct prelim_i915_user_extension { #define PRELIM_UAPI_MINOR 1 /* - * Top 8 bits of every non-engine counter are GT id. - * FIXME: __PRELIM_I915_PMU_GT_SHIFT will be changed to 56 + * Top 4 bits of every non-engine counter are GT id. */ #define __PRELIM_I915_PMU_GT_SHIFT (60) +#define __PRELIM_I915_PMU_GT_MASK (0xfull << 60) +#define __PRELIM_I915_PMU_FN_SHIFT (44) +#define __PRELIM_I915_PMU_FN_MASK (0xffffull << 44) + +/* + * bits[59:44] are used to specify the function number for the event. Only a PF + * is allowed to specify function numbers. If there are N functions ranging from + * 0 through N - 1, the following definitions are used for the function numbers: + * + * 0 : PF + * 1 through (N - 1) : VFs + * + * Below macro can be used to convert an existing event config to a function + * event config. + */ +#define ___PRELIM_I915_PMU_FN_EVENT(event, function) \ + (((event) & ~__PRELIM_I915_PMU_FN_MASK) | \ + ((function + 1) << __PRELIM_I915_PMU_FN_SHIFT)) #define ___PRELIM_I915_PMU_OTHER(gt, x) \ (((__u64)__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) | \ @@ -88,16 +105,29 @@ struct prelim_i915_user_extension { #define __PRELIM_I915_PMU_COPY_GROUP_BUSY(gt) ___PRELIM_I915_PMU_OTHER(gt, 8) #define __PRELIM_I915_PMU_MEDIA_GROUP_BUSY(gt) ___PRELIM_I915_PMU_OTHER(gt, 9) #define __PRELIM_I915_PMU_ANY_ENGINE_GROUP_BUSY(gt) ___PRELIM_I915_PMU_OTHER(gt, 10) +#define __PRELIM_I915_PMU_TOTAL_ACTIVE_TICKS(gt) ___PRELIM_I915_PMU_OTHER(gt, 11) #define __PRELIM_I915_PMU_HW_ERROR_EVENT_ID_OFFSET (__I915_PMU_OTHER(0) + 1000) #define PRELIM_I915_PMU_ENGINE_RESET_COUNT __PRELIM_I915_PMU_ENGINE_RESET_COUNT(0) #define PRELIM_I915_PMU_EU_ATTENTION_COUNT __PRELIM_I915_PMU_EU_ATTENTION_COUNT(0) -#define PRELIM_I915_PMU_RENDER_GROUP_BUSY __PRELIM_I915_PMU_RENDER_GROUP_BUSY(0) -#define PRELIM_I915_PMU_COPY_GROUP_BUSY __PRELIM_I915_PMU_COPY_GROUP_BUSY(0) -#define PRELIM_I915_PMU_MEDIA_GROUP_BUSY __PRELIM_I915_PMU_MEDIA_GROUP_BUSY(0) -#define PRELIM_I915_PMU_ANY_ENGINE_GROUP_BUSY __PRELIM_I915_PMU_ANY_ENGINE_GROUP_BUSY(0) +#define PRELIM_I915_PMU_RENDER_GROUP_BUSY __PRELIM_I915_PMU_RENDER_GROUP_BUSY(0) +#define PRELIM_I915_PMU_COPY_GROUP_BUSY __PRELIM_I915_PMU_COPY_GROUP_BUSY(0) +#define PRELIM_I915_PMU_MEDIA_GROUP_BUSY __PRELIM_I915_PMU_MEDIA_GROUP_BUSY(0) +#define PRELIM_I915_PMU_ANY_ENGINE_GROUP_BUSY __PRELIM_I915_PMU_ANY_ENGINE_GROUP_BUSY(0) +#define PRELIM_I915_PMU_TOTAL_ACTIVE_TICKS __PRELIM_I915_PMU_TOTAL_ACTIVE_TICKS(0) + +/* + * Note that I915_PMU_SAMPLE_BITS is 4 so a max of 16 events can be sampled for + * an engine. For the PRELIM version start at half of that value. + */ +#define PRELIM_I915_SAMPLE_BUSY_TICKS 8 + +#define PRELIM_I915_PMU_ENGINE_BUSY_TICKS(class, instance) \ + __I915_PMU_ENGINE(class, instance, PRELIM_I915_SAMPLE_BUSY_TICKS) + +#define PRELIM_I915_SCHEDULER_CAP_ENGINE_BUSY_TICKS_STATS (1ul << 16) /* * HW error counters. @@ -273,6 +303,73 @@ struct prelim_i915_user_extension { #define PRELIM_I915_PARAM_HAS_CHUNK_SIZE (PRELIM_I915_PARAM | 10) /* End getparam */ +/* + * On large systems, there may be different classes of memory available (NUMA), + * each with different performance characteristics and distances from the + * individual cores or devices. Using a precise NUMA node for a particular + * system buffer can have a significant performance advantage, and which + * memory region to use depends on the use case. For example, infrequently used + * objects can be stored in DDR whereas the frequently used objects should be + * in HBM, and in both cases want to be in a memory node closed to the core or + * device predominating generating the most accesses (at least with respect to + * the latency sensitive critical path of the workload). + * + * To accommodate the varying requirements of buffer placement in NUMA system + * memory, we allow passing both preferred node and allowed nodes in a similar + * manner to the NUMA mempolicy (numactl --membind), see mbind(2), used to + * govern memory allocation within a process. This policy only applies to + * system memory placements. + * + * If no user mempolicy is supplied, by default we will allocate from the memory + * node closest to the device (likely DDR). This is the same as using + * MPOL_DEFAULT. + * + * If the policy is set to MPOL_PREFERRED, an attempt is made to allocate + * from the specified nodes, before trying the default node associated with + * the GPU device. + * + * If the policy is set to MPOL_BIND, similar to MPOL_PREFFERED, the nodemask + * is used to pick which nodes to allocate from, but instead of allowing a + * fallback to the default node, it will fail. + * + * If the policy is set to MPOL_LOCAL, allocation will be performed local to + * the CPU, using the closest memory, rather than placing the memory close to + * the GPU device. + * + * If we fail to allocate from the preferred node, the sysfs error_counter + * numa%d_allocation is incremented. + * + * The policy is only applied for the first access of the object in system + * memory. If the object is evicted from its preferred node, due to swapping, + * subsequent access to that object will use the system's default allocation + * strategy. + */ +struct prelim_drm_i915_gem_create_ext_memory_policy { + /* .name = PRELIM_I915_GEM_CREATE_EXT_MEMORY_POLICY */ + struct i915_user_extension base; + + /* Memory policy; how to pick which numa node for individual chunk allocations */ + __u32 mode; +#define I915_GEM_CREATE_MPOL_DEFAULT 0 +#define I915_GEM_CREATE_MPOL_PREFERRED 1 +#define I915_GEM_CREATE_MPOL_BIND 2 +#define I915_GEM_CREATE_MPOL_INTERLEAVED 3 +#define I915_GEM_CREATE_MPOL_LOCAL 4 +#define I915_GEM_CREATE_MPOL_PREFERRED_MANY 5 /* not implemented */ + + /* Placehoder for future flags; must be zero */ + __u32 flags; + + /* Exclusive maximum of the node ids; the limit of the nodemask */ + __u32 nodemask_max; + + /* + * Pointer to a bitmask of the numa nodes to use for allocation. + * Size is derived from [0, nodemask_max). + */ + __u64 nodemask_ptr; +}; + struct prelim_drm_i915_gem_create_ext { /** @@ -289,13 +386,10 @@ struct prelim_drm_i915_gem_create_ext { __u32 handle; __u32 pad; -#define PRELIM_I915_GEM_CREATE_EXT_SETPARAM (PRELIM_I915_USER_EXT | 1) +#define PRELIM_I915_GEM_CREATE_EXT_SETPARAM (PRELIM_I915_USER_EXT | 1) #define PRELIM_I915_GEM_CREATE_EXT_PROTECTED_CONTENT (PRELIM_I915_USER_EXT | 2) -#define PRELIM_I915_GEM_CREATE_EXT_VM_PRIVATE (PRELIM_I915_USER_EXT | 3) -#define PRELIM_I915_GEM_CREATE_EXT_FLAGS_UNKNOWN \ - (~(PRELIM_I915_GEM_CREATE_EXT_SETPARAM | \ - PRELIM_I915_GEM_CREATE_EXT_PROTECTED_CONTENT | \ - PRELIM_I915_GEM_CREATE_EXT_VM_PRIVATE)) +#define PRELIM_I915_GEM_CREATE_EXT_VM_PRIVATE (PRELIM_I915_USER_EXT | 3) +#define PRELIM_I915_GEM_CREATE_EXT_MEMORY_POLICY (PRELIM_I915_USER_EXT | 4) __u64 extensions; }; @@ -708,6 +802,15 @@ enum prelim_drm_i915_perf_property_id { */ PRELIM_DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE = (PRELIM_DRM_I915_PERF_PROP | 3), + /** + * Specify the number of reports that the driver will wait for before + * returning data to the user. This value must be less than the number + * of reports that the OA buffer can hold. + * + * This property is available in perf revision 1008. + */ + PRELIM_DRM_I915_PERF_PROP_OA_NOTIFY_NUM_REPORTS = (PRELIM_DRM_I915_PERF_PROP | 4), + PRELIM_DRM_I915_PERF_PROP_LAST, PRELIM_DRM_I915_PERF_PROP_MAX = DRM_I915_PERF_PROP_MAX - 1 + \ @@ -1343,18 +1446,18 @@ struct prelim_drm_i915_gem_vm_advise { * objects and will return error (-EPERM). Hints may only be set against * the exported object, * - * ATOMIC_SYSTEM + * ADVISE_ATOMIC_SYSTEM * inform that atomic access is enabled for both CPU and GPU. * For some platforms, this may be required for correctness * and this hint will influence migration policy. * This hint is not allowed unless placement list includes SMEM, * and is not allowed for exported buffer objects (prime_export) * with placement list of LMEM + SMEM and returns error (-EPERM). - * ATOMIC_DEVICE + * ADVISE_ATOMIC_DEVICE * inform that atomic access is enabled for GPU devices. For * some platforms, this may be required for correctness and * this hint will influence migration policy. - * ATOMIC_NONE + * ADVISE_ATOMIC_NONE * clears above ATOMIC_SYSTEM / ATOMIC_DEVICE hint. * PREFERRED_LOCATION * sets the preferred memory class and instance for this object's @@ -1372,6 +1475,20 @@ struct prelim_drm_i915_gem_vm_advise { #define PRELIM_I915_VM_ADVISE_ATOMIC_NONE (PRELIM_I915_VM_ADVISE | 0) #define PRELIM_I915_VM_ADVISE_ATOMIC_SYSTEM (PRELIM_I915_VM_ADVISE | 1) #define PRELIM_I915_VM_ADVISE_ATOMIC_DEVICE (PRELIM_I915_VM_ADVISE | 2) + /** + * Attributes to apply to address range or buffer object. Different from + * above ADVISE_ATOMIC, SET_ATOMIC guarantees the correctness of atomic + * operation instead of just a hint/advise. This is mainly to fix the + * naming problem of above API: atomics should be a guarantee, not a + * hint/advise. + * + * Since currently driver already implements ADVISE_ATOMIC APIs as a + * guarantee, so just define SET_ATOMIC APIs the same behavior as + * ADVISE_ATOMIC APIs. + */ +#define PRELIM_I915_VM_SET_ATOMIC_NONE PRELIM_I915_VM_ADVISE_ATOMIC_NONE +#define PRELIM_I915_VM_SET_ATOMIC_SYSTEM PRELIM_I915_VM_ADVISE_ATOMIC_SYSTEM +#define PRELIM_I915_VM_SET_ATOMIC_DEVICE PRELIM_I915_VM_ADVISE_ATOMIC_DEVICE #define PRELIM_I915_VM_ADVISE_PREFERRED_LOCATION (PRELIM_I915_VM_ADVISE | 3) /** Preferred location (memory region) for object backing */ diff --git a/third_party/uapi/prelim/drm/intel_hwconfig_types.h b/third_party/uapi/prelim/drm/intel_hwconfig_types.h index 3994ddbf9e..acbcc5918e 100644 --- a/third_party/uapi/prelim/drm/intel_hwconfig_types.h +++ b/third_party/uapi/prelim/drm/intel_hwconfig_types.h @@ -13,7 +13,7 @@ * the static hardware configuration for that platform. * This header defines the current attribute keys for this KLV. */ -enum intel_hwconfig { +enum intel_hwconfig_keys { INTEL_HWCONFIG_MAX_SLICES_SUPPORTED = 1, INTEL_HWCONFIG_MAX_DUAL_SUBSLICES_SUPPORTED, /* 2 */ INTEL_HWCONFIG_MAX_NUM_EU_PER_DSS, /* 3 */ @@ -95,6 +95,10 @@ enum intel_hwconfig { INTEL_HWCONFIG_NUM_HBM_STACKS_PER_TILE, /* 74 */ INTEL_HWCONFIG_NUM_CHANNELS_PER_HBM_STACK, /* 75 */ INTEL_HWCONFIG_HBM_CHANNEL_WIDTH_IN_BYTES, /* 76 */ + INTEL_HWCONFIG_MIN_TASK_URB_ENTRIES, /* 77 */ + INTEL_HWCONFIG_MAX_TASK_URB_ENTRIES, /* 78 */ + INTEL_HWCONFIG_MIN_MESH_URB_ENTRIES, /* 79 */ + INTEL_HWCONFIG_MAX_MESH_URB_ENTRIES, /* 80 */ __INTEL_HWCONFIG_MAX };