build: update i915 prelim headers

third_party/uapi/prelim/drm from prelim v2.0-rc21 https://github.com/intel-gpu/drm-uapi-helper Related-To: NEO-8276 Signed-off-by: Young Jin Yoon <young.jin.yoon@intel.com>
2025-12-19 06:24:51 +08:00 · 2023-12-06 18:16:29 +00:00
parent a064388ba8
commit 4964fb8e44
3 changed files with 143 additions and 16 deletions
--- a/third_party/uapi/prelim/drm/i915_drm.h
+++ b/third_party/uapi/prelim/drm/i915_drm.h
@@ -637,6 +637,12 @@ typedef struct drm_i915_irq_wait {
 #define   I915_SCHEDULER_CAP_PRIORITY	(1ul << 1)
 #define   I915_SCHEDULER_CAP_PREEMPTION	(1ul << 2)
 #define   I915_SCHEDULER_CAP_SEMAPHORES	(1ul << 3)
+/*
+ * BUSY_STATS is deprecated on platforms with GuC based submission and will not
+ * be available at all on newer platforms. It has accuracy issues due to the
+ * conversions from tick counts to wall time.
+ * BUSY_TICKS_STATS should be used instead.
+ */
 #define   I915_SCHEDULER_CAP_ENGINE_BUSY_STATS	(1ul << 4)
 /*
 * Indicates the 2k user priority levels are statically mapped into 3 buckets as
--- a/third_party/uapi/prelim/drm/i915_drm_prelim.h
+++ b/third_party/uapi/prelim/drm/i915_drm_prelim.h
@@ -66,10 +66,27 @@ struct prelim_i915_user_extension {
 #define PRELIM_UAPI_MINOR	1

 /*
- * Top 8 bits of every non-engine counter are GT id.
- * FIXME: __PRELIM_I915_PMU_GT_SHIFT will be changed to 56
+ * Top 4 bits of every non-engine counter are GT id.
 */
 #define __PRELIM_I915_PMU_GT_SHIFT (60)
+#define __PRELIM_I915_PMU_GT_MASK (0xfull << 60)
+#define __PRELIM_I915_PMU_FN_SHIFT (44)
+#define __PRELIM_I915_PMU_FN_MASK (0xffffull << 44)
+
+/*
+ * bits[59:44] are used to specify the function number for the event. Only a PF
+ * is allowed to specify function numbers. If there are N functions ranging from
+ * 0 through N - 1, the following definitions are used for the function numbers:
+ *
+ * 0 : PF
+ * 1 through (N - 1) : VFs
+ *
+ * Below macro can be used to convert an existing event config to a function
+ * event config.
+ */
+#define ___PRELIM_I915_PMU_FN_EVENT(event, function) \
+	(((event) & ~__PRELIM_I915_PMU_FN_MASK) | \
+	 ((function + 1) << __PRELIM_I915_PMU_FN_SHIFT))

 #define ___PRELIM_I915_PMU_OTHER(gt, x) \
 	(((__u64)__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) | \
@@ -88,16 +105,29 @@ struct prelim_i915_user_extension {
 #define __PRELIM_I915_PMU_COPY_GROUP_BUSY(gt)		___PRELIM_I915_PMU_OTHER(gt, 8)
 #define __PRELIM_I915_PMU_MEDIA_GROUP_BUSY(gt)		___PRELIM_I915_PMU_OTHER(gt, 9)
 #define __PRELIM_I915_PMU_ANY_ENGINE_GROUP_BUSY(gt)	___PRELIM_I915_PMU_OTHER(gt, 10)
+#define __PRELIM_I915_PMU_TOTAL_ACTIVE_TICKS(gt)	___PRELIM_I915_PMU_OTHER(gt, 11)


 #define __PRELIM_I915_PMU_HW_ERROR_EVENT_ID_OFFSET	(__I915_PMU_OTHER(0) + 1000)

 #define PRELIM_I915_PMU_ENGINE_RESET_COUNT	__PRELIM_I915_PMU_ENGINE_RESET_COUNT(0)
 #define PRELIM_I915_PMU_EU_ATTENTION_COUNT	__PRELIM_I915_PMU_EU_ATTENTION_COUNT(0)
-#define PRELIM_I915_PMU_RENDER_GROUP_BUSY              __PRELIM_I915_PMU_RENDER_GROUP_BUSY(0)
-#define PRELIM_I915_PMU_COPY_GROUP_BUSY                __PRELIM_I915_PMU_COPY_GROUP_BUSY(0)
-#define PRELIM_I915_PMU_MEDIA_GROUP_BUSY               __PRELIM_I915_PMU_MEDIA_GROUP_BUSY(0)
-#define PRELIM_I915_PMU_ANY_ENGINE_GROUP_BUSY          __PRELIM_I915_PMU_ANY_ENGINE_GROUP_BUSY(0)
+#define PRELIM_I915_PMU_RENDER_GROUP_BUSY		__PRELIM_I915_PMU_RENDER_GROUP_BUSY(0)
+#define PRELIM_I915_PMU_COPY_GROUP_BUSY			__PRELIM_I915_PMU_COPY_GROUP_BUSY(0)
+#define PRELIM_I915_PMU_MEDIA_GROUP_BUSY		__PRELIM_I915_PMU_MEDIA_GROUP_BUSY(0)
+#define PRELIM_I915_PMU_ANY_ENGINE_GROUP_BUSY		__PRELIM_I915_PMU_ANY_ENGINE_GROUP_BUSY(0)
+#define PRELIM_I915_PMU_TOTAL_ACTIVE_TICKS		__PRELIM_I915_PMU_TOTAL_ACTIVE_TICKS(0)
+
+/*
+ * Note that I915_PMU_SAMPLE_BITS is 4 so a max of 16 events can be sampled for
+ * an engine. For the PRELIM version start at half of that value.
+ */
+#define PRELIM_I915_SAMPLE_BUSY_TICKS 8
+
+#define PRELIM_I915_PMU_ENGINE_BUSY_TICKS(class, instance) \
+	__I915_PMU_ENGINE(class, instance, PRELIM_I915_SAMPLE_BUSY_TICKS)
+
+#define  PRELIM_I915_SCHEDULER_CAP_ENGINE_BUSY_TICKS_STATS	(1ul << 16)

 /*
 * HW error counters.
@@ -273,6 +303,73 @@ struct prelim_i915_user_extension {
 #define PRELIM_I915_PARAM_HAS_CHUNK_SIZE	(PRELIM_I915_PARAM | 10)
 /* End getparam */

+/*
+ * On large systems, there may be different classes of memory available (NUMA),
+ * each with different performance characteristics and distances from the
+ * individual cores or devices. Using a precise NUMA node for a particular
+ * system buffer can have a significant performance advantage, and which
+ * memory region to use depends on the use case. For example, infrequently used
+ * objects can be stored in DDR whereas the frequently used objects should be
+ * in HBM, and in both cases want to be in a memory node closed to the core or
+ * device predominating generating the most accesses (at least with respect to
+ * the latency sensitive critical path of the workload).
+ *
+ * To accommodate the varying requirements of buffer placement in NUMA system
+ * memory, we allow passing both preferred node and allowed nodes in a similar
+ * manner to the NUMA mempolicy (numactl --membind), see mbind(2), used to
+ * govern memory allocation within a process. This policy only applies to
+ * system memory placements.
+ *
+ * If no user mempolicy is supplied, by default we will allocate from the memory
+ * node closest to the device (likely DDR). This is the same as using
+ * MPOL_DEFAULT.
+ *
+ * If the policy is set to MPOL_PREFERRED, an attempt is made to allocate
+ * from the specified nodes, before trying the default node associated with
+ * the GPU device.
+ *
+ * If the policy is set to MPOL_BIND, similar to MPOL_PREFFERED, the nodemask
+ * is used to pick which nodes to allocate from, but instead of allowing a
+ * fallback to the default node, it will fail.
+ *
+ * If the policy is set to MPOL_LOCAL, allocation will be performed local to
+ * the CPU, using the closest memory, rather than placing the memory close to
+ * the GPU device.
+ *
+ * If we fail to allocate from the preferred node, the sysfs error_counter
+ * numa%d_allocation is incremented.
+ *
+ * The policy is only applied for the first access of the object in system
+ * memory. If the object is evicted from its preferred node, due to swapping,
+ * subsequent access to that object will use the system's default allocation
+ * strategy.
+ */
+struct prelim_drm_i915_gem_create_ext_memory_policy {
+	/* .name = PRELIM_I915_GEM_CREATE_EXT_MEMORY_POLICY */
+	struct i915_user_extension base;
+
+	/* Memory policy; how to pick which numa node for individual chunk allocations */
+	__u32 mode;
+#define I915_GEM_CREATE_MPOL_DEFAULT		0
+#define I915_GEM_CREATE_MPOL_PREFERRED		1
+#define I915_GEM_CREATE_MPOL_BIND		2
+#define I915_GEM_CREATE_MPOL_INTERLEAVED	3
+#define I915_GEM_CREATE_MPOL_LOCAL		4
+#define I915_GEM_CREATE_MPOL_PREFERRED_MANY	5 /* not implemented */
+
+	/* Placehoder for future flags; must be zero */
+	__u32 flags;
+
+	/* Exclusive maximum of the node ids; the limit of the nodemask */
+	__u32 nodemask_max;
+
+	/*
+	 * Pointer to a bitmask of the numa nodes to use for allocation.
+	 * Size is derived from [0, nodemask_max).
+	 */
+	__u64 nodemask_ptr;
+};
+
 struct prelim_drm_i915_gem_create_ext {

 	/**
@@ -289,13 +386,10 @@ struct prelim_drm_i915_gem_create_ext {
 	__u32 handle;
 	__u32 pad;

-#define PRELIM_I915_GEM_CREATE_EXT_SETPARAM	(PRELIM_I915_USER_EXT | 1)
+#define PRELIM_I915_GEM_CREATE_EXT_SETPARAM		(PRELIM_I915_USER_EXT | 1)
 #define PRELIM_I915_GEM_CREATE_EXT_PROTECTED_CONTENT   (PRELIM_I915_USER_EXT | 2)
-#define PRELIM_I915_GEM_CREATE_EXT_VM_PRIVATE	(PRELIM_I915_USER_EXT | 3)
-#define PRELIM_I915_GEM_CREATE_EXT_FLAGS_UNKNOWN			\
-	(~(PRELIM_I915_GEM_CREATE_EXT_SETPARAM |			\
-	   PRELIM_I915_GEM_CREATE_EXT_PROTECTED_CONTENT |		\
-	   PRELIM_I915_GEM_CREATE_EXT_VM_PRIVATE))
+#define PRELIM_I915_GEM_CREATE_EXT_VM_PRIVATE		(PRELIM_I915_USER_EXT | 3)
+#define PRELIM_I915_GEM_CREATE_EXT_MEMORY_POLICY	(PRELIM_I915_USER_EXT | 4)
 	__u64 extensions;
 };

@@ -708,6 +802,15 @@ enum prelim_drm_i915_perf_property_id {
 	 */
 	PRELIM_DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE = (PRELIM_DRM_I915_PERF_PROP | 3),

+	/**
+	 * Specify the number of reports that the driver will wait for before
+	 * returning data to the user. This value must be less than the number
+	 * of reports that the OA buffer can hold.
+	 *
+	 * This property is available in perf revision 1008.
+	 */
+	PRELIM_DRM_I915_PERF_PROP_OA_NOTIFY_NUM_REPORTS = (PRELIM_DRM_I915_PERF_PROP | 4),
+
 	PRELIM_DRM_I915_PERF_PROP_LAST,

 	PRELIM_DRM_I915_PERF_PROP_MAX = DRM_I915_PERF_PROP_MAX - 1 + \
@@ -1343,18 +1446,18 @@ struct prelim_drm_i915_gem_vm_advise {
 	 * objects and will return error (-EPERM). Hints may only be set against
 	 * the exported object,
 	 *
-	 * ATOMIC_SYSTEM
+	 * ADVISE_ATOMIC_SYSTEM
 	 *      inform that atomic access is enabled for both CPU and GPU.
 	 *      For some platforms, this may be required for correctness
 	 *      and this hint will influence migration policy.
 	 *      This hint is not allowed unless placement list includes SMEM,
 	 *      and is not allowed for exported buffer objects (prime_export)
 	 *	with placement list of LMEM + SMEM and returns error (-EPERM).
-	 * ATOMIC_DEVICE
+	 * ADVISE_ATOMIC_DEVICE
 	 *      inform that atomic access is enabled for GPU devices. For
 	 *      some platforms, this may be required for correctness and
 	 *      this hint will influence migration policy.
-	 * ATOMIC_NONE
+	 * ADVISE_ATOMIC_NONE
 	 *	clears above ATOMIC_SYSTEM / ATOMIC_DEVICE hint.
 	 * PREFERRED_LOCATION
 	 *	sets the preferred memory class and instance for this object's
@@ -1372,6 +1475,20 @@ struct prelim_drm_i915_gem_vm_advise {
 #define PRELIM_I915_VM_ADVISE_ATOMIC_NONE		(PRELIM_I915_VM_ADVISE | 0)
 #define PRELIM_I915_VM_ADVISE_ATOMIC_SYSTEM		(PRELIM_I915_VM_ADVISE | 1)
 #define PRELIM_I915_VM_ADVISE_ATOMIC_DEVICE		(PRELIM_I915_VM_ADVISE | 2)
+	/**
+	 * Attributes to apply to address range or buffer object. Different from
+	 * above ADVISE_ATOMIC, SET_ATOMIC guarantees the correctness of atomic
+	 * operation instead of just a hint/advise. This is mainly to fix the
+	 * naming problem of above API: atomics should be a guarantee, not a
+	 * hint/advise.
+	 *
+	 * Since currently driver already implements ADVISE_ATOMIC APIs as a
+	 * guarantee, so just define SET_ATOMIC APIs the same behavior as
+	 * ADVISE_ATOMIC APIs.
+	 */
+#define PRELIM_I915_VM_SET_ATOMIC_NONE		PRELIM_I915_VM_ADVISE_ATOMIC_NONE
+#define PRELIM_I915_VM_SET_ATOMIC_SYSTEM	PRELIM_I915_VM_ADVISE_ATOMIC_SYSTEM
+#define PRELIM_I915_VM_SET_ATOMIC_DEVICE	PRELIM_I915_VM_ADVISE_ATOMIC_DEVICE
 #define PRELIM_I915_VM_ADVISE_PREFERRED_LOCATION	(PRELIM_I915_VM_ADVISE | 3)

 	/** Preferred location (memory region) for object backing */
--- a/third_party/uapi/prelim/drm/intel_hwconfig_types.h
+++ b/third_party/uapi/prelim/drm/intel_hwconfig_types.h
@@ -13,7 +13,7 @@
 * the static hardware configuration for that platform.
 * This header defines the current attribute keys for this KLV.
 */
-enum intel_hwconfig {
+enum intel_hwconfig_keys {
 	INTEL_HWCONFIG_MAX_SLICES_SUPPORTED = 1,
 	INTEL_HWCONFIG_MAX_DUAL_SUBSLICES_SUPPORTED,		/* 2 */
 	INTEL_HWCONFIG_MAX_NUM_EU_PER_DSS,			/* 3 */
@@ -95,6 +95,10 @@ enum intel_hwconfig {
 	INTEL_HWCONFIG_NUM_HBM_STACKS_PER_TILE,			/* 74 */
 	INTEL_HWCONFIG_NUM_CHANNELS_PER_HBM_STACK,		/* 75 */
 	INTEL_HWCONFIG_HBM_CHANNEL_WIDTH_IN_BYTES,		/* 76 */
+	INTEL_HWCONFIG_MIN_TASK_URB_ENTRIES,			/* 77 */
+	INTEL_HWCONFIG_MAX_TASK_URB_ENTRIES,			/* 78 */
+	INTEL_HWCONFIG_MIN_MESH_URB_ENTRIES,			/* 79 */
+	INTEL_HWCONFIG_MAX_MESH_URB_ENTRIES,			/* 80 */
 	__INTEL_HWCONFIG_MAX
 };