Update prelim uapi headers

https://github.com/intel-gpu/drm-uapi-helper/tree/v2.0-rc7

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2022-01-26 10:53:43 +00:00
committed by Compute-Runtime-Automation
parent c9df6f6f39
commit 7dc89fea78
2 changed files with 175 additions and 270 deletions

View File

@@ -623,6 +623,7 @@ typedef struct drm_i915_irq_wait {
#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2)
#define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3)
#define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4)
#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5)
#define I915_PARAM_HUC_STATUS 42
@@ -1763,7 +1764,7 @@ struct drm_i915_gem_context_param {
* Extensions:
* i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
* i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
* prelim_i915_context_engines_parallel_submit (PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT)
* i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT)
*/
#define I915_CONTEXT_PARAM_ENGINES 0xa
@@ -1942,10 +1943,140 @@ struct i915_context_engines_bond {
struct i915_engine_class_instance engines[N__]; \
} __attribute__((packed)) name__
/**
* struct i915_context_engines_parallel_submit - Configure engine for
* parallel submission.
*
* Setup a slot in the context engine map to allow multiple BBs to be submitted
* in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
* in parallel. Multiple hardware contexts are created internally in the i915 to
* run these BBs. Once a slot is configured for N BBs only N BBs can be
* submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
* doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
* many BBs there are based on the slot's configuration. The N BBs are the last
* N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
*
* The default placement behavior is to create implicit bonds between each
* context if each context maps to more than 1 physical engine (e.g. context is
* a virtual engine). Also we only allow contexts of same engine class and these
* contexts must be in logically contiguous order. Examples of the placement
* behavior are described below. Lastly, the default is to not allow BBs to be
* preempted mid-batch. Rather insert coordinated preemption points on all
* hardware contexts between each set of BBs. Flags could be added in the future
* to change both of these default behaviors.
*
* Returns -EINVAL if hardware context placement configuration is invalid or if
* the placement configuration isn't supported on the platform / submission
* interface.
* Returns -ENODEV if extension isn't supported on the platform / submission
* interface.
*
* .. code-block:: none
*
* Examples syntax:
* CS[X] = generic engine of same class, logical instance X
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
*
* Example 1 pseudo code:
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=1,
* engines=CS[0],CS[1])
*
* Results in the following valid placement:
* CS[0], CS[1]
*
* Example 2 pseudo code:
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=2,
* engines=CS[0],CS[2],CS[1],CS[3])
*
* Results in the following valid placements:
* CS[0], CS[1]
* CS[2], CS[3]
*
* This can be thought of as two virtual engines, each containing two
* engines thereby making a 2D array. However, there are bonds tying the
* entries together and placing restrictions on how they can be scheduled.
* Specifically, the scheduler can choose only vertical columns from the 2D
* array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the
* scheduler wants to submit to CS[0], it must also choose CS[1] and vice
* versa. Same for CS[2] requires also using CS[3].
* VE[0] = CS[0], CS[2]
* VE[1] = CS[1], CS[3]
*
* Example 3 pseudo code:
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=2,
* engines=CS[0],CS[1],CS[1],CS[3])
*
* Results in the following valid and invalid placements:
* CS[0], CS[1]
* CS[1], CS[3] - Not logically contiguous, return -EINVAL
*/
struct i915_context_engines_parallel_submit {
/**
* @base: base user extension.
*/
struct i915_user_extension base;
/**
* @engine_index: slot for parallel engine
*/
__u16 engine_index;
/**
* @width: number of contexts per parallel engine or in other words the
* number of batches in each submission
*/
__u16 width;
/**
* @num_siblings: number of siblings per context or in other words the
* number of possible placements for each submission
*/
__u16 num_siblings;
/**
* @mbz16: reserved for future use; must be zero
*/
__u16 mbz16;
/**
* @flags: all undefined flags must be zero, currently not defined flags
*/
__u64 flags;
/**
* @mbz64: reserved for future use; must be zero
*/
__u64 mbz64[3];
/**
* @engines: 2-d array of engine instances to configure parallel engine
*
* length = width (i) * num_siblings (j)
* index = j + i * num_siblings
*/
struct i915_engine_class_instance engines[0];
} __attribute__((packed));
#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \
struct i915_user_extension base; \
__u16 engine_index; \
__u16 width; \
__u16 num_siblings; \
__u16 mbz16; \
__u64 flags; \
__u64 mbz64[3]; \
struct i915_engine_class_instance engines[N__]; \
} __attribute__((packed)) name__
struct i915_context_param_engines {
__u64 extensions; /* linked chain of extension blocks, 0 terminates */
#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
struct i915_engine_class_instance engines[0];
} __attribute__((packed));
@@ -2435,14 +2566,20 @@ struct drm_i915_engine_info {
/** @flags: Engine flags. */
__u64 flags;
#define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE (1 << 0)
/** @capabilities: Capabilities of this engine. */
__u64 capabilities;
#define I915_VIDEO_CLASS_CAPABILITY_HEVC (1 << 0)
#define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC (1 << 1)
/** @logical_instance: Logical instance of engine */
__u16 logical_instance;
/** @rsvd1: Reserved fields. */
__u64 rsvd1[4];
__u16 rsvd1[3];
/** @rsvd2: Reserved fields. */
__u64 rsvd2[3];
};
/**
@@ -2521,4 +2658,4 @@ struct drm_i915_query_perf_config {
}
#endif
#endif /* _I915_DRM_H_ */
#endif /* _I915_DRM_H_ */

View File

@@ -42,6 +42,7 @@ struct prelim_i915_uevent {
struct prelim_i915_user_extension {
#define PRELIM_I915_USER_EXT (1 << 16)
#define PRELIM_I915_USER_EXT_MASK(x) (x & 0xffff)
#define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT (PRELIM_I915_USER_EXT | 3)
};
struct prelim_drm_i915_gem_context_create_ext_clone {
@@ -469,274 +470,8 @@ enum prelim_drm_i915_gem_engine_class {
PRELIM_I915_ENGINE_CLASS_COMPUTE = 4,
};
/*
* prelim_i915_context_engines_parallel_submit:
*
* Setup a gem context to allow multiple BBs to be submitted in a single execbuf
* IOCTL. Those BBs will then be scheduled to run on the GPU in parallel.
*
* All hardware contexts in the engine set are configured for parallel
* submission (i.e. once this gem context is configured for parallel submission,
* all the hardware contexts, regardless if a BB is available on each individual
* context, will be submitted to the GPU in parallel). A user can submit BBs to
* subset (or superset) of the hardware contexts, in a single execbuf IOCTL, but
* it is not recommended as it may reserve physical engines with nothing to run
* on them. Highly recommended to configure the gem context with N hardware
* contexts then always submit N BBs in a single IOCTL.
*
* Their are two currently defined ways to control the placement of the
* hardware contexts on physical engines: default behavior (no flags) and
* PRELIM_I915_PARALLEL_IMPLICT_BONDS (a flag). More flags may be added the in the
* future as new hardware / use cases arise. Details of how to use this
* interface below above the flags.
*
* Returns -EINVAL if hardware context placement configuration invalid or if the
* placement configuration isn't supported on the platform / submission
* interface.
*/
struct prelim_i915_context_engines_parallel_submit {
struct i915_user_extension base;
/*
* Default placement behvavior (currently unsupported):
*
* Rather than restricting parallel submission to a single class with a
* logically contiguous placement (PRELIM_I915_PARALLEL_IMPLICT_BONDS), add a mode that
* enables parallel submission across multiple engine classes. In this case each
* context's logical engine mask indicates where that context can placed
* compared to the flag, PRELIM_I915_PARALLEL_IMPLICT_BONDS, where only the first
* context's logical mask controls the placement. It is implied in this mode
* that all contexts have mutual exclusive placement (e.g. if one context is
* running VCS0 no other contexts can run on VCS0).
*
* Example 1 pseudo code:
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
* set_engines(INVALID, INVALID)
* set_load_balance(engine_index=0, num_siblings=4, engines=VCS0,VCS1,VCS2,VCS3)
* set_load_balance(engine_index=1, num_siblings=4, engines=RCS0,RCS1,RCS2,RCS3)
* set_parallel()
*
* Results in the following valid placements:
* VCS0, RCS0
* VCS0, RCS1
* VCS0, RCS2
* VCS0, RCS3
* VCS1, RCS0
* VCS1, RCS1
* VCS1, RCS2
* VCS1, RCS3
* VCS2, RCS0
* VCS2, RCS1
* VCS2, RCS2
* VCS2, RCS3
* VCS3, RCS0
* VCS3, RCS1
* VCS3, RCS2
* VCS3, RCS3
*
* Example 2 pseudo code:
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
* set_engines(INVALID, INVALID)
* set_load_balance(engine_index=0, num_siblings=3, engines=VCS0,VCS1,VCS2)
* set_load_balance(engine_index=1, num_siblings=3, engines=VCS0,VCS1,VCS2)
* set_parallel()
*
* Results in the following valid placements:
* VCS0, VCS1
* VCS0, VCS2
* VCS1, VCS0
* VCS1, VCS2
* VCS2, VCS0
* VCS2, VCS1
*
* This enables a use case where all engines are created equally, we don't care
* where they are scheduled, we just want a certain number of resources, for
* those resources to be scheduled in parallel, and possibly across multiple
* engine classes.
*
* This mode is not supported with GuC submission gen12 or any prior platforms,
* but could be supported in execlists mode. Future GuC platforms may support
* this.
*/
/*
* PRELIM_I915_PARALLEL_IMPLICT_BONDS - Create implict bonds between each context.
* Each context must have the same number sibling and bonds are implictly create
* of the siblings.
*
* All of the below examples are in logical space.
*
* Example 1 pseudo code:
* set_engines(VCS0, VCS1)
* set_parallel(flags=PRELIM_I915_PARALLEL_IMPLICT_BONDS)
*
* Results in the following valid placements:
* VCS0, VCS1
*
* Example 2 pseudo code:
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
* set_engines(INVALID, INVALID)
* set_load_balance(engine_index=0, num_siblings=4, engines=VCS0,VCS2,VCS4,VCS6)
* set_load_balance(engine_index=1, num_siblings=4, engines=VCS1,VCS3,VCS5,VCS7)
* set_parallel(flags=PRELIM_I915_PARALLEL_IMPLICT_BONDS)
*
* Results in the following valid placements:
* VCS0, VCS1
* VCS2, VCS3
* VCS4, VCS5
* VCS6, VCS7
*
* Example 3 pseudo code:
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
* set_engines(INVALID, INVALID, INVALID, INVALID)
* set_load_balance(engine_index=0, num_siblings=2, engines=VCS0,VCS4)
* set_load_balance(engine_index=1, num_siblings=2, engines=VCS1,VCS5)
* set_load_balance(engine_index=2, num_siblings=2, engines=VCS2,VCS6)
* set_load_balance(engine_index=3, num_siblings=2, engines=VCS3,VCS7)
* set_parallel(flags=PRELIM_I915_PARALLEL_IMPLICT_BONDS)
*
* Results in the following valid placements:
* VCS0, VCS1, VCS2, VCS3
* VCS4, VCS5, VCS6, VCS7
*
* This enables a use case where all engines are not equal and certain placement
* rules are required (i.e. split-frame requires all contexts to be placed in a
* logically contiguous order on the VCS engines on gen11/gen12 platforms). This
* use case (logically contiguous placement, within a single engine class) is
* supported when using GuC submission. Execlist mode could support all possible
* bonding configurations.
*/
#define PRELIM_I915_PARALLEL_IMPLICT_BONDS (1ull << 63)
/*
* Do not allow BBs to be preempted mid BB rather insert coordinated preemption
* points on all hardware contexts between each BB. An example use case of this
* feature is split-frame on gen11 or gen12 hardware. When using this feature a
* BB must be submitted on each hardware context in the parallel gem context.
* The execbuf2 IOCTL enforces the user adheres to policy.
*/
#define PRELIM_I915_PARALLEL_BATCH_PREEMPT_BOUNDARY (1ull << 62)
#define __PRELIM_I915_PARALLEL_UNKNOWN_FLAGS (~GENMASK_ULL(63, 62))
__u64 flags; /* all undefined flags must be zero */
__u64 mbz64[4]; /* reserved for future use; must be zero */
} __attribute__ ((packed));
/**
* struct prelim_drm_i915_context_engines_parallel2_submit - Configure engine
* for parallel submission.
*
* Setup a slot in the context engine map to allow multiple BBs to be submitted
* in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
* in parallel. Multiple hardware contexts are created internally in the i915
* run these BBs. Once a slot is configured for N BBs only N BBs can be
* submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
* doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
* many BBs there are based on the slot's configuration. The N BBs are the last
* N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
*
* The default placement behavior is to create implicit bonds between each
* context if each context maps to more than 1 physical engine (e.g. context is
* a virtual engine). Also we only allow contexts of same engine class and these
* contexts must be in logically contiguous order. Examples of the placement
* behavior described below. Lastly, the default is to not allow BBs to
* preempted mid BB rather insert coordinated preemption on all hardware
* contexts between each set of BBs. Flags may be added in the future to change
* both of these default behaviors.
*
* Returns -EINVAL if hardware context placement configuration is invalid or if
* the placement configuration isn't supported on the platform / submission
* interface.
* Returns -ENODEV if extension isn't supported on the platform / submission
* inteface.
*
* .. code-block::
*
* Example 1 pseudo code:
* CS[X] = generic engine of same class, logical instance X
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=1,
* engines=CS[0],CS[1])
*
* Results in the following valid placement:
* CS[0], CS[1]
*
* Example 2 pseudo code:
* CS[X] = generic engine of same class, logical instance X
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=2,
* engines=CS[0],CS[2],CS[1],CS[3])
*
* Results in the following valid placements:
* CS[0], CS[1]
* CS[2], CS[3]
*
* This can also be thought of as 2 virtual engines described by 2-D array
* in the engines the field with bonds placed between each index of the
* virtual engines. e.g. CS[0] is bonded to CS[1], CS[2] is bonded to
* CS[3].
* VE[0] = CS[0], CS[2]
* VE[1] = CS[1], CS[3]
*
* Example 3 pseudo code:
* CS[X] = generic engine of same class, logical instance X
* INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
* set_engines(INVALID)
* set_parallel(engine_index=0, width=2, num_siblings=2,
* engines=CS[0],CS[1],CS[1],CS[3])
*
* Results in the following valid and invalid placements:
* CS[0], CS[1]
* CS[1], CS[3] - Not logical contiguous, return -EINVAL
*/
struct prelim_drm_i915_context_engines_parallel2_submit {
/**
* @base: base user extension.
*/
struct i915_user_extension base;
/**
* @engine_index: slot for parallel engine
*/
__u16 engine_index;
/**
* @width: number of contexts per parallel engine
*/
__u16 width;
/**
* @num_siblings: number of siblings per context
*/
__u16 num_siblings;
/**
* @mbz16: reserved for future use; must be zero
*/
__u16 mbz16;
/**
* @flags: all undefined flags must be zero, currently not defined flags
*/
__u64 flags;
/**
* @mbz64: reserved for future use; must be zero
*/
__u64 mbz64[3];
/**
* @engines: 2-d array of engine instances to configure parallel engine
*
* length = width (i) * num_siblings (j)
* index = j + i * num_siblings
*/
struct i915_engine_class_instance engines[0];
} __attribute__ ((packed));
struct prelim_i915_context_param_engines {
#define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT (PRELIM_I915_USER_EXT | 2) /* see prelim_i915_context_engines_parallel_submit */
#define PRELIM_I915_CONTEXT_ENGINES_EXT_PARALLEL2_SUBMIT (PRELIM_I915_USER_EXT | 3) /* see prelim_i915_context_engines_parallel2_submit */
};
/* PRELIM OA formats */
@@ -753,6 +488,7 @@ enum prelim_drm_i915_oa_format {
PRELIM_I915_OAC_FORMAT_A24u64_B8_C8,
PRELIM_I915_OA_FORMAT_A38u64_R2u64_B8_C8,
PRELIM_I915_OAM_FORMAT_A2u64_R2u64_B8_C8,
PRELIM_I915_OAC_FORMAT_A24u22_B8_C8,
PRELIM_I915_OA_FORMAT_MAX /* non-ABI */
};
@@ -1453,6 +1189,38 @@ struct prelim_drm_i915_gem_wait_user_fence {
__s64 timeout;
};
/*
* This extension allows user to attach a pair of <addr, value> to an execbuf.
* When that execbuf is finished by GPU HW, the value is written to addr.
* So after execbuf is submitted, user can poll addr to know whether execbuf
* has been finished or not. User space can also call i915_gem_wait_user_fence_ioctl
* (with PRELIM_I915_UFENCE_WAIT_EQ operation) to wait for finishing of execbuf.
* This ioctl can sleep so it is more efficient than a busy polling.
* So this serves as synchronization purpose. It is similar to DRM_IOCTL_I915_GEM_WAIT,
* which is prohibited by compute context. The method introduced here can be use for
* both compute and non-compute context.
*/
struct prelim_drm_i915_gem_execbuffer_ext_user_fence {
#define PRELIM_DRM_I915_GEM_EXECBUFFER_EXT_USER_FENCE (PRELIM_I915_USER_EXT | 1)
struct i915_user_extension base;
/**
* A virtual address mapped to current process's GPU address space.
* addr has to be qword aligned. address has to be a a valid gpu
* virtual address at the time of batch buffer completion.
*/
__u64 addr;
/**
* value to be written to above address after execbuf finishes.
*/
__u64 value;
/**
* for future extensions. Currently not used.
*/
__u64 rsvd;
};
struct prelim_drm_i915_vm_bind_ext_sync_fence {
#define PRELIM_I915_VM_BIND_EXT_SYNC_FENCE (PRELIM_I915_USER_EXT | 0)
struct i915_user_extension base;
@@ -1560,4 +1328,4 @@ struct prelim_drm_i915_gem_vm_param {
__u64 value;
};
#endif /* __I915_DRM_PRELIM_H__ */
#endif /* __I915_DRM_PRELIM_H__ */