media-driver/media_softlet/linux/common/os/xe/mos_bufmgr_xe.c

3446 lines
112 KiB
C

/*
* Copyright © 2023 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Xu, Zhengguo <zhengguo.xu@intel.com>
*/
#ifdef HAVE_LIBGEN_H
#include <libgen.h>
#endif
#include <stdlib.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <signal.h>
#include <getopt.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <sys/utsname.h>
#include <termios.h>
#ifndef ETIME
#define ETIME ETIMEDOUT
#endif
#include <map>
#include <vector>
#include <queue>
#include <list>
#include <mutex>
#include <shared_mutex>
#include <algorithm>
#ifdef HAVE_VALGRIND
#include <valgrind/valgrind.h>
#include <valgrind/memcheck.h>
#define VG(x) x
#else
#define VG(x) do {} while (0)
#endif
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mos_bufmgr_api.h"
#include "mos_util_debug.h"
#include "intel_hwconfig_types.h"
#include "xf86drm.h"
#include "mos_vma.h"
#include "libdrm_lists.h"
#include "mos_bufmgr_xe.h"
#include "mos_synchronization_xe.h"
#include "mos_utilities.h"
#include "mos_bufmgr_util_debug.h"
#include "media_user_setting_value.h"
#include "linux_system_info.h"
#include "mos_oca_defs_specific.h"
//These two struct used by mos_bufmgr_priv.h
typedef struct MOS_OCA_EXEC_LIST_INFO mos_oca_exec_list_info;
//struct MEDIA_SYSTEM_INFO;
#include "mos_bufmgr_priv.h"
#define PAGE_SIZE_4K (1ull << 12)
#define MAX(a, b) ((a) > (b) ? (a) : (b))
//mos_xe_mem_class currently used as index of default_alignment
enum mos_xe_mem_class
{
MOS_XE_MEM_CLASS_SYSMEM = 0, //For DRM_XE_MEM_REGION_CLASS_SYSMEM
MOS_XE_MEM_CLASS_VRAM, //For DRM_XE_MEM_REGION_CLASS_VRAM
MOS_XE_MEM_CLASS_MAX
};
struct mos_xe_context {
struct mos_linux_context ctx;
/**
* Always keep the latest avaiable timeline index for
* such execution's fence out point.
*/
struct mos_xe_dep* timeline_dep;
/**
* The UMD's dummy exec_queue id for exec_queue ctx.
*/
uint32_t dummy_exec_queue_id;
/**
* Indicate to the ctx width.
*/
uint8_t ctx_width;
/**
* Indicate to num placements when creating exec_queue.
*/
uint8_t num_placements;
/**
* Indicate to engine class used to create exec_queue.
*/
uint16_t engine_class;
/**
* Indicate to engine capability of queried exec_queue.
*/
uint64_t engine_caps;
/**
* Indicate to creation flags, current value should be always zero.
*/
uint32_t flags;
/**
* Indicate whether it is protected ctx.
*/
bool is_protected;
/**
* Indicate to exec_queue reset count on this context;
* Note, this count depends on context restore, if uplayer tries to query
* reset statue before context restore, this value may be incorrect.
*/
uint32_t reset_count;
};
typedef struct mos_xe_device {
/**
* Note: we agree that hw_config[0] points to the number of hw config in total
* And hw config data starts from hw_config[1]
*/
uint32_t *hw_config = nullptr;
struct drm_xe_query_config *config = nullptr;
struct drm_xe_query_engines *engines = nullptr;
struct drm_xe_query_mem_regions *mem_regions = nullptr;
struct drm_xe_query_gt_list *gt_list = nullptr;
/**
* Note: we agree here that uc_versions[0] for guc version and uc_versions[1] for huc version
*/
struct drm_xe_query_uc_fw_version uc_versions[UC_TYPE_MAX];
} mos_xe_device;
typedef struct mos_xe_bufmgr_gem {
struct mos_bufmgr bufmgr;
atomic_t ref_count;
int fd;
std::recursive_mutex m_lock;
drmMMListHead managers;
drmMMListHead named;
mos_vma_heap vma_heap[MEMZONE_COUNT];
bool object_capture_disabled; // Note: useless on xe and remove it in furture.
#define MEM_PROFILER_BUFFER_SIZE 256
char mem_profiler_buffer[MEM_PROFILER_BUFFER_SIZE];
char* mem_profiler_path;
int mem_profiler_fd;
uint32_t gt_id;
/**
* This RW lock is used for avoid reading or writing the same sync obj in KMD.
* Reading sync obj ioctl: exec and syncobj wait.
* Writing sync obj ioctl: reset sync obj, destroy sync obj and create sync obj.
*/
std::shared_timed_mutex sync_obj_rw_lock;
/**
* Save the pair of UMD dummy exec_queue id and ctx pointer.
*/
std::map<uint32_t, struct mos_xe_context*> global_ctx_info;
uint32_t vm_id;
/**
* Everything queried from kmd that indicates to hw infomation.
*/
struct mos_xe_device xe_device;
//Note: DON't put these fields in xe_device
bool has_vram;
uint8_t va_bits;
/** bitmask of all memory regions */
uint64_t mem_regions_mask;
/** @default_alignment: safe alignment regardless region location */
uint32_t default_alignment[MOS_XE_MEM_CLASS_MAX] = {PAGE_SIZE_4K, PAGE_SIZE_4K};
//End of Note
/**
* Indicates whether gpu-gpu and cpu-gpu synchronization is disabled.
* This is mainly for debug purpose, and synchronizarion should be always enabled by default.
* It could be disabled by env INTEL_SYNCHRONIZATION_DISABLE.
*/
bool is_disable_synchronization;
/** indicate to exec_queue property of timeslice */
#define EXEC_QUEUE_TIMESLICE_DEFAULT -1
#define EXEC_QUEUE_TIMESLICE_MAX 100000 //100ms
int32_t exec_queue_timeslice;
} mos_xe_bufmgr_gem;
typedef struct mos_xe_exec_bo {
/** indicate to real exec bo*/
struct mos_linux_bo *bo;
/**
* Save read, write flags etc.
* Two flags defined here: EXEC_OBJECT_READ_XE and EXEC_OBJECT_WRITE_XE.
* Whether this bo needs exec sync depends on this flags.
*/
uint32_t flags;
} mos_xe_exec_bo;
typedef struct mos_xe_bo_gem {
/**
* Maximun size for bo name
*/
#define MAX_NAME_SIZE 128
struct mos_linux_bo bo;
/**
* Reference count
*/
atomic_t ref_count;
/**
* Map count when map bo is called
*/
atomic_t map_count;
//Note7: unify gem_handle and bo.handle by deleting this one; Refine mos_linux_bo.handle to typt of uint32_t
/**
* Bo handle allocared from drm
* Note: conbine with bo.handle to use same one.
*/
uint32_t gem_handle;
/**
* Save bo name, this is for debug usage;
* Suggest giving bo name when allocating bo.
*/
char name[MAX_NAME_SIZE];
/**
*
* List contains prime fd'd objects
*/
drmMMListHead name_list;
/**
* Mapped address for the buffer, saved across map/unmap cycles
*/
void *mem_virtual;
/**
* Boolean of whether this buffer was allocated with userptr
*/
bool is_userptr;
/**
* Memory region on created the surfaces for local/system memory;
* This field only indicates to memory region type, it not memory region instance.
*/
int mem_region;
/**
* We should always get the syncobj handle from the bo handle by bellow 4 steps in each time:
* 1. get the prime_handle from bo.handle
* 2. get syncfile fd from prime_fd
* 3. get syncobj_handle from syncfile by
* 4. close prime_fd and syncfile fd.
*
* If umd wants external process to sync between them, umd should always import its batch
* syncobj handle into each external bo's dma sync buffer.
*
* Boolean of whether this buffer is imported from external
*/
bool is_imported;
/**
* @cpu_caching: The CPU caching mode to select for this object. If
* mmaping the object the mode selected here will also be used.
*
* Supported values:
*
* DRM_XE_GEM_CPU_CACHING_WB: Allocate the pages with write-back
* caching. On iGPU this can't be used for scanout surfaces. Currently
* not allowed for objects placed in VRAM.
*
* DRM_XE_GEM_CPU_CACHING_WC: Allocate the pages as write-combined. This
* is uncached. Scanout surfaces should likely use this. All objects
* that can be placed in VRAM must use this.
*/
uint16_t cpu_caching;
/**
* @pat_index: The platform defined @pat_index to use for this mapping.
* The index basically maps to some predefined memory attributes,
* including things like caching, coherency, compression etc. The exact
* meaning of the pat_index is platform specific. When the KMD sets up
* the binding the index here is encoded into the ppGTT PTE.
*
* For coherency the @pat_index needs to be at least 1way coherent when
* drm_xe_gem_create.cpu_caching is DRM_XE_GEM_CPU_CACHING_WB. The KMD
* will extract the coherency mode from the @pat_index and reject if
* there is a mismatch (see note below for pre-MTL platforms).
*
* Note: On pre-MTL platforms there is only a caching mode and no
* explicit coherency mode, but on such hardware there is always a
* shared-LLC (or is dgpu) so all GT memory accesses are coherent with
* CPU caches even with the caching mode set as uncached. It's only the
* display engine that is incoherent (on dgpu it must be in VRAM which
* is always mapped as WC on the CPU). However to keep the uapi somewhat
* consistent with newer platforms the KMD groups the different cache
* levels into the following coherency buckets on all pre-MTL platforms:
*
* ppGTT UC -> COH_NONE
* ppGTT WC -> COH_NONE
* ppGTT WT -> COH_NONE
* ppGTT WB -> COH_AT_LEAST_1WAY
*
* In practice UC/WC/WT should only ever used for scanout surfaces on
* such platforms (or perhaps in general for dma-buf if shared with
* another device) since it is only the display engine that is actually
* incoherent. Everything else should typically use WB given that we
* have a shared-LLC. On MTL+ this completely changes and the HW
* defines the coherency mode as part of the @pat_index, where
* incoherent GT access is possible.
*
* Note: For userptr and externally imported dma-buf the kernel expects
* either 1WAY or 2WAY for the @pat_index.
*/
uint16_t pat_index;
/**
* Boolean of whether this buffer is exported to external
*/
bool is_exported;
/**
* For cmd bo, it has an exec bo list which saves all exec bo in it.
* Uplayer caller should alway update this list before exec submission and clear the list after exec submission.
*/
std::map<uintptr_t, struct mos_xe_exec_bo> exec_list;
#define INVALID_EXEC_QUEUE_ID -1
/**
* Save last dummy write exec_queue id.
* Init this field as INVALID_EXEC_QUEUE_ID at begining.
*/
uint32_t last_exec_write_exec_queue;
/**
* Save last dummy read exec_queue id.
* Init this field as INVALID_EXEC_QUEUE_ID at begining.
*/
uint32_t last_exec_read_exec_queue;
/**
* Read dependents, pair of dummy EXEC_QUEUE_ID and mos_xe_bo_dep
* This map saves read deps of this bo on all exec exec_queue;
* Exec will check opration flags to get the dep from the map to add into exec sync array and updated the map after exec.
* Refer to exec call to get more details.
*/
std::map<uint32_t, struct mos_xe_bo_dep> read_deps;
/**
* Write dependents, pair of dummy EXEC_QUEUE_ID and mos_xe_bo_dep
* This map saves write deps of this bo on all exec exec_queue;
* Exec will check opration flags to get the dep from the map to add into exec sync array and updated the map after exec.
* Refer to exec call to get more details.
*/
std::map<uint32_t, struct mos_xe_bo_dep> write_deps;
} mos_xe_bo_gem;
struct mos_xe_external_bo_info {
/**
* syncobj handle created by umd to import external bo syncfile
*/
int syncobj_handle;
/**
* prime fd export from external bo handle
*/
int prime_fd;
};
#define MOS_UNIMPLEMENT(param) (void)(param)
static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list };
static void mos_bo_free_xe(struct mos_linux_bo *bo);
static int mos_query_engines_count_xe(struct mos_bufmgr *bufmgr, unsigned int *nengine);
int mos_query_engines_xe(struct mos_bufmgr *bufmgr,
__u16 engine_class,
__u64 caps,
unsigned int *nengine,
void *engine_map);
static void mos_gem_bo_wait_rendering_xe(struct mos_linux_bo *bo);
static struct mos_xe_bufmgr_gem *
mos_bufmgr_gem_find(int fd)
{
struct mos_xe_bufmgr_gem *bufmgr_gem;
DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) {
if (bufmgr_gem->fd == fd) {
atomic_inc(&bufmgr_gem->ref_count);
return bufmgr_gem;
}
}
return nullptr;
}
#define MOS_DRM_CHK_XE_DEV(xe_dev, info, query_func, retval) \
MOS_DRM_CHK_NULL_RETURN_VALUE(xe_dev, retval); \
if (xe_dev->info == nullptr) \
{ \
xe_dev->info = query_func(fd); \
MOS_DRM_CHK_NULL_RETURN_VALUE(xe_dev->info, retval); \
}
static struct drm_xe_query_gt_list *
__mos_query_gt_list_xe(int fd)
{
int ret = 0;
struct drm_xe_query_gt_list *gt_list;
struct drm_xe_device_query query;
memclear(query);
query.query = DRM_XE_DEVICE_QUERY_GT_LIST;
ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY,
&query);
if (ret || !query.size)
{
return nullptr;
}
gt_list = (drm_xe_query_gt_list *)calloc(1, query.size);
MOS_DRM_CHK_NULL_RETURN_VALUE(gt_list, nullptr);
query.data = (uintptr_t)(gt_list);
ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY,
&query);
if (ret || !query.size || 0 == gt_list->num_gt)
{
MOS_XE_SAFE_FREE(gt_list);
return nullptr;
}
return gt_list;
}
static uint32_t __mos_query_mem_regions_instance_mask_xe(struct mos_bufmgr *bufmgr)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, 0)
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
struct mos_xe_device *dev = &bufmgr_gem->xe_device;
int fd = bufmgr_gem->fd;
uint64_t __memory_regions = 0;
MOS_DRM_CHK_XE_DEV(dev, gt_list, __mos_query_gt_list_xe, 0)
struct drm_xe_query_gt_list *gt_list = dev->gt_list;
for (int i = 0; i < gt_list->num_gt; i++) {
/**
* Note: __memory_regions is the mem region instance mask on all tiles and gts
*/
__memory_regions |= gt_list->gt_list[i].near_mem_regions |
gt_list->gt_list[i].far_mem_regions;
}
bufmgr_gem->mem_regions_mask = __memory_regions;
return __memory_regions;
}
static struct drm_xe_query_mem_regions *
__mos_query_mem_regions_xe(int fd)
{
int ret = 0;
struct drm_xe_query_mem_regions *mem_regions;
struct drm_xe_device_query query;
memclear(query);
query.query = DRM_XE_DEVICE_QUERY_MEM_REGIONS;
ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY,
&query);
if (ret || !query.size)
{
return nullptr;
}
mem_regions = (drm_xe_query_mem_regions *)calloc(1, query.size);
MOS_DRM_CHK_NULL_RETURN_VALUE(mem_regions, nullptr);
query.data = (uintptr_t)(mem_regions);
ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
if (ret || !query.size || 0 == mem_regions->num_mem_regions)
{
MOS_XE_SAFE_FREE(mem_regions);
return nullptr;
}
return mem_regions;
}
uint8_t __mos_query_vram_region_count_xe(struct mos_xe_device *dev, int fd)
{
uint8_t vram_regions = 0;
MOS_DRM_CHK_XE_DEV(dev, mem_regions, __mos_query_mem_regions_xe, 0)
struct drm_xe_query_mem_regions *mem_regions = dev->mem_regions;
for (int i =0; i < mem_regions->num_mem_regions; i++)
{
if (mem_regions->mem_regions[i].mem_class == DRM_XE_MEM_REGION_CLASS_VRAM)
{
vram_regions++;
}
}
return vram_regions;
}
int mos_force_gt_reset_xe(int fd, int gt_id)
{
char reset_string[128];
sprintf(reset_string, "cat /sys/kernel/debug/dri/0/gt%d/force_reset", gt_id);
return system(reset_string);
}
static struct drm_xe_query_config *
__mos_query_config_xe(int fd)
{
struct drm_xe_query_config *config;
struct drm_xe_device_query query;
int ret = 0;
memclear(query);
query.query = DRM_XE_DEVICE_QUERY_CONFIG;
ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, (void *)&query);
if (ret || !query.size)
{
return nullptr;
}
config = (drm_xe_query_config *) malloc(query.size);
if (config != nullptr)
{
memset(config, 0, query.size);
}
else
{
MOS_DRM_ASSERTMESSAGE("malloc config failed");
return nullptr;
}
query.data = (uintptr_t)config;
ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, (void *)&query);
if (ret || !query.size || 0 == config->num_params)
{
MOS_XE_SAFE_FREE(config);
return nullptr;
}
return config;
}
static int
__mos_get_default_alignment_xe(struct mos_bufmgr *bufmgr)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, -EINVAL)
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
struct mos_xe_device *dev = &bufmgr_gem->xe_device;
int fd = bufmgr_gem->fd;
MOS_DRM_CHK_XE_DEV(dev, mem_regions, __mos_query_mem_regions_xe, -ENODEV)
struct drm_xe_query_mem_regions *mem_regions = dev->mem_regions;
uint16_t mem_class;
for (int i = 0; i < mem_regions->num_mem_regions; i++)
{
if (DRM_XE_MEM_REGION_CLASS_SYSMEM == mem_regions->mem_regions[i].mem_class)
{
mem_class = MOS_XE_MEM_CLASS_SYSMEM;
}
else if (DRM_XE_MEM_REGION_CLASS_VRAM == mem_regions->mem_regions[i].mem_class)
{
mem_class = MOS_XE_MEM_CLASS_VRAM;
}
else
{
MOS_DRM_ASSERTMESSAGE("Unsupported mem class");
return -EINVAL;
}
if (bufmgr_gem->default_alignment[mem_class] < mem_regions->mem_regions[i].min_page_size)
{
bufmgr_gem->default_alignment[mem_class] = mem_regions->mem_regions[i].min_page_size;
}
}
return 0;
}
/**
* Note: Need to add this func to bufmgr api later
*/
static int
mos_query_uc_version_xe(struct mos_bufmgr *bufmgr, struct mos_drm_uc_version *version)
{
int ret = 0;
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
int fd = bufmgr_gem->fd;
struct mos_xe_device *dev = &bufmgr_gem->xe_device;
if (bufmgr && version && version->uc_type < UC_TYPE_MAX)
{
/**
* Note: query uc version from kmd if no historic data in bufmgr, otherwise using historic data.
*/
if (dev->uc_versions[version->uc_type].uc_type != version->uc_type)
{
struct drm_xe_device_query query;
memclear(query);
query.size = sizeof(struct drm_xe_query_uc_fw_version);
query.query = DRM_XE_DEVICE_QUERY_UC_FW_VERSION;
memclear(dev->uc_versions[version->uc_type]);
dev->uc_versions[version->uc_type].uc_type = version->uc_type;
query.data = (uintptr_t)&dev->uc_versions[version->uc_type];
ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_DEVICE_QUERY,
&query);
if (ret)
{
memclear(dev->uc_versions[version->uc_type]);
dev->uc_versions[version->uc_type].uc_type = UC_TYPE_INVALID;
MOS_DRM_ASSERTMESSAGE("Failed to query UC version, uc type: %d, errno: %d", version->uc_type, ret);
return ret;
}
}
version->major_version = dev->uc_versions[version->uc_type].major_ver;
version->minor_version = dev->uc_versions[version->uc_type].minor_ver;
}
return ret;
}
bool __mos_has_vram_xe(struct mos_bufmgr *bufmgr)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, 0)
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
struct mos_xe_device *dev = &bufmgr_gem->xe_device;
int fd = bufmgr_gem->fd;
MOS_DRM_CHK_XE_DEV(dev, config, __mos_query_config_xe, 0)
struct drm_xe_query_config *config = dev->config;
bool has_vram = ((config->info[DRM_XE_QUERY_CONFIG_FLAGS] & DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM) > 0);
bufmgr_gem->has_vram = has_vram;
return has_vram;
}
uint8_t __mos_query_va_bits_xe(struct mos_bufmgr *bufmgr)
{
uint8_t va_bits = 48;
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, va_bits)
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
struct mos_xe_device *dev = &bufmgr_gem->xe_device;
int fd = bufmgr_gem->fd;
bufmgr_gem->va_bits = va_bits;
MOS_DRM_CHK_XE_DEV(dev, config, __mos_query_config_xe, va_bits)
struct drm_xe_query_config *config = dev->config;
va_bits = config->info[DRM_XE_QUERY_CONFIG_VA_BITS] & 0xff;
bufmgr_gem->va_bits = va_bits;
return va_bits;
}
static uint64_t
mos_get_platform_information_xe(struct mos_bufmgr *bufmgr)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, 0)
return bufmgr->platform_information;
}
static void
mos_set_platform_information_xe(struct mos_bufmgr *bufmgr, uint64_t p)
{
if (bufmgr)
bufmgr->platform_information |= p;
}
static enum mos_memory_zone
__mos_bo_memzone_for_address_xe(uint64_t address)
{
if (address >= MEMZONE_PRIME_START)
return MEMZONE_PRIME;
else if (address >= MEMZONE_DEVICE_START)
return MEMZONE_DEVICE;
else
return MEMZONE_SYS;
}
static void
__mos_bo_vma_free_xe(struct mos_bufmgr *bufmgr,
uint64_t address,
uint64_t size)
{
CHK_CONDITION(nullptr == bufmgr, "nullptr bufmgr.\n", );
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
CHK_CONDITION(0ull == address, "invalid address.\n", );
enum mos_memory_zone memzone = __mos_bo_memzone_for_address_xe(address);
mos_vma_heap_free(&bufmgr_gem->vma_heap[memzone], address, size);
}
static void
__mos_bo_mark_mmaps_incoherent_xe(struct mos_linux_bo *bo)
{
#if HAVE_VALGRIND
struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
if (bo_gem->mem_virtual)
VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
#endif
}
static inline void
mos_bo_reference_xe(struct mos_linux_bo *bo)
{
struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
atomic_inc(&bo_gem->ref_count);
}
drm_export void mos_bo_unreference_xe(struct mos_linux_bo *bo)
{
struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
if (atomic_read(&bo_gem->ref_count) <= 0)
return;
if (atomic_dec_and_test(&bo_gem->ref_count))
{
/* release memory associated with this object */
/* Clear any left-over mappings */
if (atomic_read(&bo_gem->map_count) > 0)
{
atomic_set(&bo_gem->map_count, 0);
__mos_bo_mark_mmaps_incoherent_xe(bo);
}
DRMLISTDEL(&bo_gem->name_list);
mos_bo_free_xe(bo);
}
}
static uint32_t
__mos_vm_create_xe(struct mos_bufmgr *bufmgr)
{
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
struct drm_xe_vm_create vm;
int ret;
memclear(vm);
ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_VM_CREATE, &vm);
if (ret != 0)
{
MOS_DRM_ASSERTMESSAGE("DRM_IOCTL_XE_VM_CREATE failed: %s",
strerror(errno));
return INVALID_VM;
}
return vm.vm_id;
}
static void
__mos_vm_destroy_xe(struct mos_bufmgr *bufmgr, uint32_t vm_id)
{
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
struct drm_xe_vm_destroy vm_destroy;
int ret;
if (INVALID_VM == vm_id)
{
MOS_DRM_ASSERTMESSAGE("invalid vm_id");
return;
}
memclear(vm_destroy);
vm_destroy.vm_id = vm_id;
ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_VM_DESTROY, &vm_destroy);
if (ret != 0)
{
MOS_DRM_ASSERTMESSAGE("DRM_IOCTL_XE_VM_DESTROY failed: %s",
strerror(errno));
}
}
static uint32_t
mos_vm_create_xe(struct mos_bufmgr *bufmgr)
{
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
if (bufmgr_gem->vm_id != INVALID_VM)
{
return bufmgr_gem->vm_id;
}
else
{
return __mos_vm_create_xe(bufmgr);
}
}
static void
mos_vm_destroy_xe(struct mos_bufmgr *bufmgr, uint32_t vm_id)
{
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
if (vm_id != bufmgr_gem->vm_id)
{
__mos_vm_destroy_xe(bufmgr, vm_id);
}
}
static struct mos_linux_context *
mos_context_create_shared_xe(
struct mos_bufmgr *bufmgr,
mos_linux_context* ctx,
__u32 flags,
bool bContextProtected,
void *engine_map,
uint8_t ctx_width,
uint8_t num_placements,
uint32_t ctx_type)
{
MOS_UNUSED(ctx);
MOS_UNUSED(ctx_type);
MOS_UNUSED(bContextProtected);
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, nullptr)
MOS_DRM_CHK_NULL_RETURN_VALUE(engine_map, nullptr)
static uint32_t dummy_exec_queue_id = 0;
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
struct mos_xe_context *context = nullptr;
struct drm_xe_exec_queue_create create;
int ret;
uint16_t engine_class = ((struct drm_xe_engine_class_instance *)engine_map)[0].engine_class;
memclear(create);
create.width = ctx_width;
create.num_placements = num_placements;
create.vm_id = bufmgr_gem->vm_id;
create.flags = flags;
create.instances = (uintptr_t)engine_map;
/**
* Note: must use MOS_New to allocate buffer instead of malloc since mos_xe_context
* contains std::vector and std::queue. Otherwise both will have no instance.
*/
context = MOS_New(mos_xe_context);
MOS_DRM_CHK_NULL_RETURN_VALUE(context, nullptr)
/**
* Set exec_queue timeslice for render/ compute only as WA to ensure exec sequence.
* Note, this is caused by a potential issue in kmd since exec_queue preemption by plenty of WL w/ same priority.
*/
if ((engine_class == DRM_XE_ENGINE_CLASS_RENDER
|| engine_class == DRM_XE_ENGINE_CLASS_COMPUTE)
&& (ctx_width * num_placements == 1)
&& bufmgr_gem->exec_queue_timeslice != EXEC_QUEUE_TIMESLICE_DEFAULT)
{
struct drm_xe_ext_set_property timeslice;
memclear(timeslice);
timeslice.property = DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE;
/**
* Note, this value indicates to maximum of time slice for WL instead of real waiting time.
*/
timeslice.value = bufmgr_gem->exec_queue_timeslice;
timeslice.base.name = DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY;
create.extensions = (uintptr_t)(&timeslice);
MOS_DRM_NORMALMESSAGE("WA: exec_queue timeslice set by engine class(%d), value(%d)",
engine_class, bufmgr_gem->exec_queue_timeslice);
}
ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &create);
MOS_DRM_CHK_STATUS_MESSAGE_RETURN_VALUE_WH_OP(ret, context, MOS_Delete, nullptr,
"ioctl failed in DRM_IOCTL_XE_EXEC_QUEUE_CREATE, return error(%d)", ret);
context->ctx.ctx_id = create.exec_queue_id;
context->ctx_width = ctx_width;
context->num_placements = num_placements;
context->engine_class = ((struct drm_xe_engine_class_instance *)engine_map)[0].engine_class;
context->is_protected = bContextProtected;
context->flags = flags;
context->ctx.bufmgr = bufmgr;
context->ctx.vm_id = bufmgr_gem->vm_id;
context->reset_count = 0;
context->timeline_dep = nullptr;
bufmgr_gem->m_lock.lock();
context->dummy_exec_queue_id = ++dummy_exec_queue_id;
bufmgr_gem->global_ctx_info[context->dummy_exec_queue_id] = context;
bufmgr_gem->m_lock.unlock();
return &context->ctx;
}
static struct mos_linux_context *
mos_context_create_xe(struct mos_bufmgr *bufmgr)
{
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
struct mos_xe_context *context = nullptr;
/**
* Note: must use MOS_New to allocate buffer instead of malloc since mos_xe_context
* contains std::queue. Otherwise queue will have no instance.
*/
context = MOS_New(mos_xe_context);
MOS_DRM_CHK_NULL_RETURN_VALUE(context, nullptr)
context->ctx.ctx_id = INVALID_EXEC_QUEUE_ID;
context->ctx_width = 0;
context->ctx.bufmgr = bufmgr;
context->ctx.vm_id = bufmgr_gem->vm_id;
context->reset_count = 0;
context->timeline_dep = nullptr;
context->dummy_exec_queue_id = INVALID_EXEC_QUEUE_ID;
return &context->ctx;
}
static struct mos_linux_context *
mos_context_create_ext_xe(
struct mos_bufmgr *bufmgr,
__u32 flags,
bool bContextProtected)
{
MOS_UNUSED(flags);
MOS_UNUSED(bContextProtected);
return mos_context_create_xe(bufmgr);
}
static void
mos_context_destroy_xe(struct mos_linux_context *ctx)
{
if (nullptr == ctx)
{
return;
}
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)(ctx->bufmgr);
if (nullptr == bufmgr_gem)
{
return;
}
struct mos_xe_context *context = (struct mos_xe_context *)ctx;
struct drm_xe_exec_queue_destroy exec_queue_destroy;
int ret;
bufmgr_gem->m_lock.lock();
bufmgr_gem->sync_obj_rw_lock.lock();
mos_sync_destroy_timeline_dep(bufmgr_gem->fd, context->timeline_dep);
context->timeline_dep = nullptr;
bufmgr_gem->global_ctx_info.erase(context->dummy_exec_queue_id);
bufmgr_gem->sync_obj_rw_lock.unlock();
bufmgr_gem->m_lock.unlock();
if (INVALID_EXEC_QUEUE_ID == ctx->ctx_id)
{
MOS_Delete(context);
return;
}
memclear(exec_queue_destroy);
exec_queue_destroy.exec_queue_id = ctx->ctx_id;
ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &exec_queue_destroy);
if (ret != 0)
MOS_DRM_ASSERTMESSAGE("DRM_IOCTL_XE_EXEC_QUEUE_DESTROY failed: %s", strerror(errno));
MOS_Delete(context);
}
/**
* Restore banned exec_queue with newly created one
* Note: this call is only for banned context restore, if using it
* as other purpose, MUST pay attention to context->reset_count here.
*/
static int
__mos_context_restore_xe(struct mos_bufmgr *bufmgr,
struct mos_linux_context *ctx)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, -EINVAL);
MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL);
if (INVALID_EXEC_QUEUE_ID == ctx->ctx_id)
{
MOS_DRM_ASSERTMESSAGE("Unable to restore intel context, it is not supported");
return -EINVAL;
}
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
struct mos_xe_context *context = (struct mos_xe_context *)ctx;
int ret;
//query engine firstly
uint32_t nengine = 0;
ret = mos_query_engines_count_xe(bufmgr, &nengine);
MOS_DRM_CHK_STATUS_MESSAGE_RETURN(ret,
"query engine count of restore failed, return error(%d)", ret)
struct drm_xe_engine_class_instance engine_map[nengine];
ret = mos_query_engines_xe(bufmgr,
context->engine_class,
context->engine_caps,
&nengine,
(void*)engine_map);
MOS_DRM_CHK_STATUS_MESSAGE_RETURN(ret,
"query engine of restore failed, return error(%d)", ret)
//create new exec queue
struct drm_xe_exec_queue_create create;
memclear(create);
create.width = context->ctx_width;
create.num_placements = context->num_placements;
create.vm_id = context->ctx.vm_id;
create.flags = context->flags;
create.instances = (uintptr_t)engine_map;
ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &create);
MOS_DRM_CHK_STATUS_MESSAGE_RETURN(ret,
"ioctl failed in DRM_IOCTL_XE_EXEC_QUEUE_CREATE of restore, return error(%d)", ret)
//destroy old exec_queue
struct drm_xe_exec_queue_destroy exec_queue_destroy;
memclear(exec_queue_destroy);
exec_queue_destroy.exec_queue_id = ctx->ctx_id;
ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &exec_queue_destroy);
MOS_DRM_CHK_STATUS_MESSAGE_RETURN(ret,
"ioctl failed in DRM_IOCTL_XE_EXEC_QUEUE_DESTROY of restore, return error(%d)", ret)
//restore
context->ctx.ctx_id = create.exec_queue_id;
context->reset_count += 1;
return MOS_XE_SUCCESS;
}
/**
* Get the property of the ctx
*
* @ctx indicates to the context that to query
* @property indicates to what property that to query
* @value indicates to quired value with given property
*/
static int
__mos_get_context_property_xe(struct mos_bufmgr *bufmgr,
struct mos_linux_context *ctx,
uint32_t property,
uint64_t &value)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, -EINVAL);
MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL);
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
struct drm_xe_exec_queue_get_property p;
memclear(p);
p.property = property;
p.exec_queue_id = ctx->ctx_id;
int ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY, &p);
value = p.value;
return ret;
}
/**
* Allocate a section of virtual memory for a buffer, assigning an address.
*/
static uint64_t
__mos_bo_vma_alloc_xe(struct mos_bufmgr *bufmgr,
enum mos_memory_zone memzone,
uint64_t size,
uint64_t alignment)
{
CHK_CONDITION(nullptr == bufmgr, "nullptr bufmgr.\n", 0);
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
/* Force alignment to be some number of pages */
alignment = ALIGN(alignment, PAGE_SIZE);
uint64_t addr = mos_vma_heap_alloc(&bufmgr_gem->vma_heap[memzone], size, alignment);
// currently only support 48bit range address
CHK_CONDITION((addr >> 48ull) != 0, "invalid address, over 48bit range.\n", 0);
CHK_CONDITION((addr >> (MEMZONE_SYS == memzone ? 40ull : (MEMZONE_DEVICE == memzone ? 41ull:42ull))) != 0, "invalid address, over memory zone range.\n", 0);
CHK_CONDITION((addr % alignment) != 0, "invalid address, not meet aligment requirement.\n", 0);
return addr;
}
static int
__mos_bo_set_offset_xe(MOS_LINUX_BO *bo)
{
struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
MOS_DRM_CHK_NULL_RETURN_VALUE(bo_gem, -EINVAL)
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
uint64_t offset = 0;
uint64_t alignment = 0;
if (0 == bo->offset64)
{
bufmgr_gem->m_lock.lock();
/* On platforms where lmem only supports 64K pages, kmd requires us
* to either align the va to 2M or seperate the lmem objects and smem
* objects into different va zones to avoid mixing up lmem object and
* smem object into same page table. For imported object, we don't know
* if it's in lmem or smem. So, we need to align the va to 2M.
*/
if (MEMZONE_PRIME == bo_gem->mem_region)
{
offset = __mos_bo_vma_alloc_xe(bo->bufmgr, (enum mos_memory_zone)bo_gem->mem_region, bo->size, PAGE_SIZE_2M);
}
else if (MEMZONE_DEVICE == bo_gem->mem_region)
{
alignment = MAX(bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_VRAM], PAGE_SIZE_64K);
offset = __mos_bo_vma_alloc_xe(bo->bufmgr, (enum mos_memory_zone)bo_gem->mem_region, bo->size, PAGE_SIZE_64K);
}
else if (MEMZONE_SYS == bo_gem->mem_region)
{
alignment = MAX(bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_SYSMEM], PAGE_SIZE_64K);
offset = __mos_bo_vma_alloc_xe(bo->bufmgr, (enum mos_memory_zone)bo_gem->mem_region, bo->size, PAGE_SIZE_64K);
}
else
{
MOS_DRM_ASSERTMESSAGE("Invalid mem_region:%d", bo_gem->mem_region);
}
bo->offset64 = offset;
bo->offset = offset;
bufmgr_gem->m_lock.unlock();
}
return 0;
}
static int __mos_vm_bind_xe(int fd, uint32_t vm_id, uint32_t exec_queue_id, uint32_t bo_handle,
uint64_t offset, uint64_t addr, uint64_t size, uint16_t pat_index, uint32_t op, uint32_t flags,
struct drm_xe_sync *sync, uint32_t num_syncs, uint64_t ext)
{
int ret;
struct drm_xe_vm_bind bind;
memclear(bind);
bind.extensions = ext;
bind.vm_id = vm_id;
bind.exec_queue_id = exec_queue_id;
bind.num_binds = 1;
bind.bind.obj = bo_handle;
bind.bind.obj_offset = offset;
bind.bind.range = size;
bind.bind.pat_index = pat_index;
bind.bind.addr = addr;
bind.bind.op = op;
bind.bind.flags = flags;
bind.num_syncs = num_syncs;
bind.syncs = (uintptr_t)sync;
ret = drmIoctl(fd, DRM_IOCTL_XE_VM_BIND, &bind);
if (ret)
{
MOS_DRM_ASSERTMESSAGE("Failed to bind vm, vm_id:%d, exec_queue_id:%d, op:0x%x, flags:0x%x, bo_handle:%d, offset:%lx, addr:0x%lx, size:%ld, pat_index:%d, errno(%d)",
vm_id, exec_queue_id, op, flags, bo_handle, offset, addr, size, pat_index, -errno);
}
return ret;
}
static int mos_vm_bind_sync_xe(int fd, uint32_t vm_id, uint32_t bo, uint64_t offset,
uint64_t addr, uint64_t size, uint16_t pat_index, uint32_t op)
{
struct drm_xe_sync sync;
memclear(sync);
sync.flags = DRM_XE_SYNC_FLAG_SIGNAL;
sync.type = DRM_XE_SYNC_TYPE_SYNCOBJ;
sync.handle = mos_sync_syncobj_create(fd, 0);
int ret = __mos_vm_bind_xe(fd, vm_id, 0, bo, offset, addr, size, pat_index,
op, 0, &sync, 1, 0);
if (ret)
{
MOS_DRM_ASSERTMESSAGE("ret:%d, error:%d", ret, -errno);
mos_sync_syncobj_destroy(fd, sync.handle);
return ret;
}
ret = mos_sync_syncobj_wait_err(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
if (ret)
{
MOS_DRM_ASSERTMESSAGE("syncobj_wait error:%d", -errno);
}
mos_sync_syncobj_destroy(fd, sync.handle);
return ret;
}
static int mos_vm_bind_async_xe(int fd, uint32_t vm_id, uint32_t bo, uint64_t offset,
uint64_t addr, uint64_t size, uint16_t pat_index, uint32_t op,
struct drm_xe_sync *sync, uint32_t num_syncs)
{
return __mos_vm_bind_xe(fd, vm_id, 0, bo, offset, addr, size, pat_index,
op, 0, sync, num_syncs, 0);
}
drm_export struct mos_linux_bo *
mos_bo_alloc_xe(struct mos_bufmgr *bufmgr,
struct mos_drm_bo_alloc *alloc)
{
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
struct mos_xe_bo_gem *bo_gem;
struct drm_xe_gem_create create;
uint32_t bo_align = alloc->alignment;
int ret;
/**
* Note: must use MOS_New to allocate buffer instead of malloc since mos_xe_bo_gem
* contains std::vector and std::map. Otherwise both will have no instance.
*/
bo_gem = MOS_New(mos_xe_bo_gem);
MOS_DRM_CHK_NULL_RETURN_VALUE(bo_gem, nullptr)
memclear(bo_gem->bo);
bo_gem->is_exported = false;
bo_gem->is_imported = false;
bo_gem->is_userptr = false;
bo_gem->last_exec_read_exec_queue = INVALID_EXEC_QUEUE_ID;
bo_gem->last_exec_write_exec_queue = INVALID_EXEC_QUEUE_ID;
atomic_set(&bo_gem->map_count, 0);
bo_gem->mem_virtual = nullptr;
bo_gem->mem_region = MEMZONE_SYS;
bo_align = MAX(alloc->alignment, bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_SYSMEM]);
if (bufmgr_gem->has_vram &&
(MOS_MEMPOOL_VIDEOMEMORY == alloc->ext.mem_type || MOS_MEMPOOL_DEVICEMEMORY == alloc->ext.mem_type))
{
bo_gem->mem_region = MEMZONE_DEVICE;
bo_align = MAX(alloc->alignment, bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_VRAM]);
alloc->ext.cpu_cacheable = false;
}
memclear(create);
if (MEMZONE_DEVICE == bo_gem->mem_region)
{
//Note: memory_region is related to gt_id for multi-tiles gpu, take gt_id into consideration in case of multi-tiles
create.placement = bufmgr_gem->mem_regions_mask & (~0x1);
}
else
{
create.placement = bufmgr_gem->mem_regions_mask & 0x1;
}
//Note: We suggest vm_id=0 here as default, otherwise this bo cannot be exported as prelim fd.
create.vm_id = 0;
create.size = ALIGN(alloc->size, bo_align);
/**
* Note: current, it only supports WB/ WC while UC and other cache are not allowed.
*/
create.cpu_caching = alloc->ext.cpu_cacheable ? DRM_XE_GEM_CPU_CACHING_WB : DRM_XE_GEM_CPU_CACHING_WC;
if ((strcmp(alloc->name, "MEDIA") == 0 || strcmp(alloc->name, "Media") == 0)
&& create.cpu_caching == DRM_XE_GEM_CPU_CACHING_WC)
create.flags |= DRM_XE_GEM_CREATE_FLAG_SCANOUT;
ret = drmIoctl(bufmgr_gem->fd,
DRM_IOCTL_XE_GEM_CREATE,
&create);
MOS_DRM_CHK_STATUS_MESSAGE_RETURN_VALUE_WH_OP(ret, bo_gem, MOS_Delete, nullptr,
"ioctl failed in DRM_IOCTL_XE_GEM_CREATE, return error(%d)", ret);
bo_gem->gem_handle = create.handle;
bo_gem->bo.handle = bo_gem->gem_handle;
bo_gem->bo.size = create.size;
bo_gem->bo.vm_id = INVALID_VM;
bo_gem->bo.bufmgr = bufmgr;
bo_gem->bo.align = bo_align;
bo_gem->cpu_caching = create.cpu_caching;
/**
* Note: Better to get a default pat_index to overwite invalid argv. Normally it should not happen.
*/
bo_gem->pat_index = alloc->ext.pat_index == PAT_INDEX_INVALID ? 0 : alloc->ext.pat_index;
if (bufmgr_gem->mem_profiler_fd != -1)
{
snprintf(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE, "GEM_CREATE, %d, %d, %lu, %d, %s\n",
getpid(), bo_gem->bo.handle, bo_gem->bo.size,bo_gem->mem_region, alloc->name);
ret = write(bufmgr_gem->mem_profiler_fd,
bufmgr_gem->mem_profiler_buffer,
strnlen(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE));
if (-1 == ret)
{
MOS_DRM_ASSERTMESSAGE("Failed to write to %s: %s",
bufmgr_gem->mem_profiler_path, strerror(errno));
}
}
/* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized
list (vma_list), so better set the list head here */
DRMINITLISTHEAD(&bo_gem->name_list);
memcpy(bo_gem->name, alloc->name, (strlen(alloc->name) + 1) > MAX_NAME_SIZE ? MAX_NAME_SIZE : (strlen(alloc->name) + 1));
atomic_set(&bo_gem->ref_count, 1);
MOS_DRM_NORMALMESSAGE("buf %d (%s) %ldb, bo:0x%lx",
bo_gem->gem_handle, alloc->name, alloc->size, (uint64_t)&bo_gem->bo);
__mos_bo_set_offset_xe(&bo_gem->bo);
ret = mos_vm_bind_sync_xe(bufmgr_gem->fd,
bufmgr_gem->vm_id,
bo_gem->gem_handle,
0,
bo_gem->bo.offset64,
bo_gem->bo.size,
bo_gem->pat_index,
DRM_XE_VM_BIND_OP_MAP);
if (ret)
{
MOS_DRM_ASSERTMESSAGE("mos_vm_bind_sync_xe ret: %d", ret);
mos_bo_free_xe(&bo_gem->bo);
return nullptr;
}
else
{
bo_gem->bo.vm_id = bufmgr_gem->vm_id;
}
return &bo_gem->bo;
}
static unsigned long
__mos_bo_tile_size_xe(struct mos_xe_bufmgr_gem *bufmgr_gem, unsigned long size,
uint32_t *tiling_mode, uint32_t alignment)
{
unsigned long min_size, max_size;
unsigned long i;
if (TILING_NONE == *tiling_mode)
return size;
/* 965+ just need multiples of page size for tiling */
return ROUND_UP_TO(size, alignment);
}
/*
* Round a given pitch up to the minimum required for X tiling on a
* given chip. We use 512 as the minimum to allow for a later tiling
* change.
*/
static unsigned long
__mos_bo_tile_pitch_xe(struct mos_xe_bufmgr_gem *bufmgr_gem,
unsigned long pitch, uint32_t *tiling_mode)
{
unsigned long tile_width;
unsigned long i;
/* If untiled, then just align it so that we can do rendering
* to it with the 3D engine.
*/
if (TILING_NONE == *tiling_mode)
return ALIGN(pitch, 64);
if (TILING_X == *tiling_mode)
tile_width = 512;
else
tile_width = 128;
/* 965 is flexible */
return ROUND_UP_TO(pitch, tile_width);
}
static struct mos_linux_bo *
mos_bo_alloc_tiled_xe(struct mos_bufmgr *bufmgr,
struct mos_drm_bo_alloc_tiled *alloc_tiled)
{
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
unsigned long size, stride;
uint32_t tiling;
uint32_t alignment = bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_SYSMEM];
if (bufmgr_gem->has_vram &&
(MOS_MEMPOOL_VIDEOMEMORY == alloc_tiled->ext.mem_type || MOS_MEMPOOL_DEVICEMEMORY == alloc_tiled->ext.mem_type))
{
alignment = bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_VRAM];
}
do {
unsigned long aligned_y, height_alignment;
tiling = alloc_tiled->ext.tiling_mode;
/* If we're tiled, our allocations are in 8 or 32-row blocks,
* so failure to align our height means that we won't allocate
* enough pages.
*
* If we're untiled, we still have to align to 2 rows high
* because the data port accesses 2x2 blocks even if the
* bottom row isn't to be rendered, so failure to align means
* we could walk off the end of the GTT and fault. This is
* documented on 965, and may be the case on older chipsets
* too so we try to be careful.
*/
aligned_y = alloc_tiled->y;
height_alignment = 2;
if (TILING_X == tiling)
height_alignment = 8;
else if (TILING_Y == tiling)
height_alignment = 32;
aligned_y = ALIGN(alloc_tiled->y, height_alignment);
stride = alloc_tiled->x * alloc_tiled->cpp;
stride = __mos_bo_tile_pitch_xe(bufmgr_gem, stride, &alloc_tiled->ext.tiling_mode);
size = stride * aligned_y;
size = __mos_bo_tile_size_xe(bufmgr_gem, size, &alloc_tiled->ext.tiling_mode, alignment);
} while (alloc_tiled->ext.tiling_mode != tiling);
alloc_tiled->pitch = stride;
struct mos_drm_bo_alloc alloc;
alloc.name = alloc_tiled->name;
alloc.size = size;
alloc.alignment = alignment;
alloc.ext = alloc_tiled->ext;
return mos_bo_alloc_xe(bufmgr, &alloc);
}
drm_export struct mos_linux_bo *
mos_bo_alloc_userptr_xe(struct mos_bufmgr *bufmgr,
struct mos_drm_bo_alloc_userptr *alloc_uptr)
{
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
struct mos_xe_bo_gem *bo_gem;
int ret;
/**
* Note: must use MOS_New to allocate buffer instead of malloc since mos_xe_bo_gem
* contains std::vector and std::map. Otherwise both will have no instance.
*/
bo_gem = MOS_New(mos_xe_bo_gem);
MOS_DRM_CHK_NULL_RETURN_VALUE(bo_gem, nullptr)
memclear(bo_gem->bo);
bo_gem->is_exported = false;
bo_gem->is_imported = false;
bo_gem->is_userptr = true;
bo_gem->last_exec_read_exec_queue = INVALID_EXEC_QUEUE_ID;
bo_gem->last_exec_write_exec_queue = INVALID_EXEC_QUEUE_ID;
atomic_set(&bo_gem->map_count, 0);
bo_gem->mem_virtual = alloc_uptr->addr;
bo_gem->gem_handle = INVALID_HANDLE;
bo_gem->bo.handle = INVALID_HANDLE;
bo_gem->bo.size = alloc_uptr->size;
bo_gem->pat_index = alloc_uptr->pat_index == PAT_INDEX_INVALID ? 0 : alloc_uptr->pat_index;
bo_gem->bo.bufmgr = bufmgr;
bo_gem->bo.vm_id = INVALID_VM;
bo_gem->mem_region = MEMZONE_SYS;
/* Save the address provided by user */
#ifdef __cplusplus
bo_gem->bo.virt = alloc_uptr->addr;
#else
bo_gem->bo.virtual = alloc_uptr->addr;
#endif
/* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized
list (vma_list), so better set the list head here */
DRMINITLISTHEAD(&bo_gem->name_list);
memcpy(bo_gem->name, alloc_uptr->name, (strlen(alloc_uptr->name) + 1) > MAX_NAME_SIZE ? MAX_NAME_SIZE : (strlen(alloc_uptr->name) + 1));
atomic_set(&bo_gem->ref_count, 1);
__mos_bo_set_offset_xe(&bo_gem->bo);
ret = mos_vm_bind_sync_xe(bufmgr_gem->fd,
bufmgr_gem->vm_id,
0,
(uint64_t)alloc_uptr->addr,
bo_gem->bo.offset64,
bo_gem->bo.size,
bo_gem->pat_index,
DRM_XE_VM_BIND_OP_MAP_USERPTR);
if (ret)
{
MOS_DRM_ASSERTMESSAGE("mos_xe_vm_bind_userptr_sync ret: %d", ret);
mos_bo_free_xe(&bo_gem->bo);
return nullptr;
}
else
{
bo_gem->bo.vm_id = bufmgr_gem->vm_id;
}
MOS_DRM_NORMALMESSAGE("mos_bo_alloc_userptr_xe: buf (%s) %ldb, bo:0x%lx",
alloc_uptr->name, alloc_uptr->size, (uint64_t)&bo_gem->bo);
return &bo_gem->bo;
}
static struct mos_linux_bo *
mos_bo_create_from_prime_xe(struct mos_bufmgr *bufmgr, struct mos_drm_bo_alloc_prime *alloc_prime)
{
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
int ret;
uint32_t handle;
struct mos_xe_bo_gem *bo_gem;
int prime_fd = alloc_prime->prime_fd;
int size = alloc_prime->size;
uint16_t pat_index = alloc_prime->pat_index;
drmMMListHead *list;
bufmgr_gem->m_lock.lock();
ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
if (ret)
{
MOS_DRM_ASSERTMESSAGE("create_from_prime: failed to obtain handle from fd: %s", strerror(errno));
bufmgr_gem->m_lock.unlock();
return nullptr;
}
/*
* See if the kernel has already returned this buffer to us. Just as
* for named buffers, we must not create two bo's pointing at the same
* kernel object
*/
for (list = bufmgr_gem->named.next; list != &bufmgr_gem->named; list = list->next)
{
bo_gem = DRMLISTENTRY(struct mos_xe_bo_gem, list, name_list);
if (bo_gem->gem_handle == handle)
{
mos_bo_reference_xe(&bo_gem->bo);
bufmgr_gem->m_lock.unlock();
return &bo_gem->bo;
}
}
bo_gem = MOS_New(mos_xe_bo_gem);
if (!bo_gem)
{
bufmgr_gem->m_lock.unlock();
return nullptr;
}
memclear(bo_gem->bo);
bo_gem->is_exported = false;
bo_gem->is_imported = true;
bo_gem->is_userptr = false;
bo_gem->last_exec_read_exec_queue = INVALID_EXEC_QUEUE_ID;
bo_gem->last_exec_write_exec_queue = INVALID_EXEC_QUEUE_ID;
atomic_set(&bo_gem->map_count, 0);
bo_gem->mem_virtual = nullptr;
/* Determine size of bo. The fd-to-handle ioctl really should
* return the size, but it doesn't. If we have kernel 3.12 or
* later, we can lseek on the prime fd to get the size. Older
* kernels will just fail, in which case we fall back to the
* provided (estimated or guess size). */
ret = lseek(prime_fd, 0, SEEK_END);
if (ret != -1)
bo_gem->bo.size = ret;
else
bo_gem->bo.size = size;
bo_gem->bo.handle = handle;
/*
* Note: Need to get the pat_index by the customer_gmminfo with 1way coherency at least.
*/
bo_gem->pat_index = pat_index == PAT_INDEX_INVALID ? 0 : pat_index;
bo_gem->bo.bufmgr = bufmgr;
bo_gem->gem_handle = handle;
atomic_set(&bo_gem->ref_count, 1);
/**
* change bo_gem->name to const char*
*/
memcpy(bo_gem->name, alloc_prime->name, sizeof("prime"));
bo_gem->mem_region = MEMZONE_PRIME;
DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
bufmgr_gem->m_lock.unlock();
__mos_bo_set_offset_xe(&bo_gem->bo);
ret = mos_vm_bind_sync_xe(bufmgr_gem->fd,
bufmgr_gem->vm_id,
bo_gem->gem_handle,
0,
bo_gem->bo.offset64,
bo_gem->bo.size,
bo_gem->pat_index,
DRM_XE_VM_BIND_OP_MAP);
if (ret)
{
MOS_DRM_ASSERTMESSAGE("mos_vm_bind_sync_xe ret: %d", ret);
mos_bo_free_xe(&bo_gem->bo);
return nullptr;
}
else
{
bo_gem->bo.vm_id = bufmgr_gem->vm_id;
}
return &bo_gem->bo;
}
static int
mos_bo_export_to_prime_xe(struct mos_linux_bo *bo, int *prime_fd)
{
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
bufmgr_gem->m_lock.lock();
if (DRMLISTEMPTY(&bo_gem->name_list))
DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
bufmgr_gem->m_lock.unlock();
mos_gem_bo_wait_rendering_xe(bo);
if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
DRM_CLOEXEC, prime_fd) != 0)
return -errno;
bo_gem->is_exported = true;
return 0;
}
/**
* Update exec list for submission.
*
* @cmd_bo indicates to cmd bo for the exec submission.
* @exec_bo indicates to the gpu resource for exec submission.
* @write_flag indicates to whether exec bo's operation write on GPU.
*/
static int
mos_gem_bo_update_exec_list_xe(struct mos_linux_bo *cmd_bo, struct mos_linux_bo *exec_bo, bool write_flag)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(cmd_bo, -EINVAL)
MOS_DRM_CHK_NULL_RETURN_VALUE(exec_bo, -EINVAL)
struct mos_xe_bo_gem *cmd_bo_gem = (struct mos_xe_bo_gem *) cmd_bo;
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) cmd_bo->bufmgr;
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
std::map<uintptr_t, struct mos_xe_exec_bo> &exec_list = cmd_bo_gem->exec_list;
if (exec_bo->handle == cmd_bo->handle)
{
MOS_DRM_NORMALMESSAGE("cmd bo should not add into exec list, skip it");
return MOS_XE_SUCCESS;
}
uintptr_t key = (uintptr_t)exec_bo;
if (exec_list.count(key) > 0)
{
/**
* This exec bo has added before, but need to update its exec flags.
*/
// For all BOs with read and write usages, we could just assign write flag to reduce read deps size.
if (write_flag || (exec_list[key].flags & EXEC_OBJECT_WRITE_XE))
{
exec_list[key].flags = EXEC_OBJECT_WRITE_XE;
}
else
{
// For BOs only with read usage, we should assign read flag.
exec_list[key].flags |= EXEC_OBJECT_READ_XE;
}
}
else
{
struct mos_xe_exec_bo target;
target.bo = exec_bo;
target.flags = write_flag ? EXEC_OBJECT_WRITE_XE : EXEC_OBJECT_READ_XE;
exec_list[key] = target;
mos_bo_reference_xe(exec_bo);
}
return MOS_XE_SUCCESS;
}
/**
* Clear the exec bo from the list after submission.
*
* @cmd_bo indicates to cmd bo for the exec submission.
* @start is unused.
*/
static void
mos_gem_bo_clear_exec_list_xe(struct mos_linux_bo *cmd_bo, int start)
{
MOS_UNUSED(start);
if (cmd_bo != nullptr && cmd_bo->bufmgr != nullptr)
{
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) cmd_bo->bufmgr;
struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) cmd_bo;
std::map<uintptr_t, struct mos_xe_exec_bo> &exec_list = bo_gem->exec_list;
for (auto &it : exec_list) {
mos_bo_unreference_xe(it.second.bo);
}
exec_list.clear();
}
}
/**
* This is to dump all pending execution timeline done on such bo
*/
int
__mos_dump_bo_wait_rendering_timeline_xe(uint32_t bo_handle,
uint32_t *handles,
uint64_t *points,
uint32_t count,
int64_t timeout_nsec,
uint32_t wait_flags,
uint32_t rw_flags)
{
#if (_DEBUG || _RELEASE_INTERNAL)
if (__XE_TEST_DEBUG(XE_DEBUG_SYNCHRONIZATION))
{
MOS_DRM_CHK_NULL_RETURN_VALUE(handles, -EINVAL)
char log_msg[MOS_MAX_MSG_BUF_SIZE] = { 0 };
int offset = 0;
offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
MOS_MAX_MSG_BUF_SIZE - offset,
"\n\t\t\tdump bo wait rendering: bo handle = %d, timeout_nsec = %ld, wait_flags = %d, rw_flags = %d",
bo_handle,
timeout_nsec,
wait_flags,
rw_flags);
for (int i = 0; i < count; i++)
{
offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
MOS_MAX_MSG_BUF_SIZE - offset,
"\n\t\t\t-syncobj handle = %d, timeline = %ld",
handles[i],
points[i]);
}
offset > MOS_MAX_MSG_BUF_SIZE ?
MOS_DRM_NORMALMESSAGE("imcomplete dump since log msg buffer overwrite %s", log_msg) : MOS_DRM_NORMALMESSAGE("%s", log_msg);
}
#endif
return MOS_XE_SUCCESS;
}
/**
* @bo indicates to bo object that need to wait
* @timeout_nsec indicates to timeout in nanosecond:
* if timeout_nsec > 0, waiting for given time, if timeout, return -ETIME;
* if timeout_nsec ==0, check bo busy state, if busy, return -ETIME imediately;
* @wait_flags indicates wait operation, it supports wait all, wait submit, wait available or wait any;
* refer drm syncobj to get more details in drm.h
* @rw_flags indicates to read/write operation:
* if rw_flags & EXEC_OBJECT_WRITE_XE, means bo write. Otherwise it means bo read.
* @first_signaled indicates to first signaled syncobj handle in the handls array.
*/
static int
__mos_gem_bo_wait_timeline_rendering_with_flags_xe(struct mos_linux_bo *bo,
int64_t timeout_nsec,
uint32_t wait_flags,
uint32_t rw_flags,
uint32_t *first_signaled)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(bo, -EINVAL)
mos_xe_bufmgr_gem *bufmgr_gem = (mos_xe_bufmgr_gem *)bo->bufmgr;
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
int ret = MOS_XE_SUCCESS;
uint32_t count = 0;
mos_xe_bo_gem *bo_gem = (mos_xe_bo_gem *)bo;
std::map<uint32_t, uint64_t> timeline_data; //pair(syncobj, point)
std::vector<uint32_t> handles;
std::vector<uint64_t> points;
std::set<uint32_t> exec_queue_ids;
bufmgr_gem->m_lock.lock();
bufmgr_gem->sync_obj_rw_lock.lock_shared();
MOS_XE_GET_KEYS_FROM_MAP(bufmgr_gem->global_ctx_info, exec_queue_ids);
mos_sync_get_bo_wait_timeline_deps(exec_queue_ids,
bo_gem->read_deps,
bo_gem->write_deps,
timeline_data,
bo_gem->last_exec_write_exec_queue,
rw_flags);
bufmgr_gem->m_lock.unlock();
for (auto it : timeline_data)
{
handles.push_back(it.first);
points.push_back(it.second);
}
count = handles.size();
if (count > 0)
{
ret = mos_sync_syncobj_timeline_wait(bufmgr_gem->fd,
handles.data(),
points.data(),
count,
timeout_nsec,
wait_flags,
first_signaled);
__mos_dump_bo_wait_rendering_timeline_xe(bo_gem->gem_handle,
handles.data(),
points.data(),
count,
timeout_nsec,
wait_flags,
rw_flags);
}
bufmgr_gem->sync_obj_rw_lock.unlock_shared();
return ret;
}
/**
* Check if bo is still busy state.
*
* Check if read dep on all exec_queue and write dep on last write exec_queue are signaled.
* If any one dep is not signaled, that means this bo is busy and return -ETIME immediately.
* Otheriwise, move all dep on this bo from busy queue to free queue for reuse.
*/
static int
mos_gem_bo_busy_xe(struct mos_linux_bo *bo)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(bo, -EINVAL);
mos_xe_bufmgr_gem *bufmgr_gem = (mos_xe_bufmgr_gem *)bo->bufmgr;
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
int64_t timeout_nsec = 0;
uint32_t wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
uint32_t rw_flags = EXEC_OBJECT_READ_XE | EXEC_OBJECT_WRITE_XE;
int ret = __mos_gem_bo_wait_timeline_rendering_with_flags_xe(bo, timeout_nsec, wait_flags, rw_flags, nullptr);
if (ret)
{
//busy
if (errno != ETIME)
{
MOS_DRM_ASSERTMESSAGE("bo_busy_xe ret:%d, error:%d", ret, -errno);
}
return true;
}
else if (MOS_XE_SUCCESS == ret)
{
//free
return false;
}
return false;
}
/**
* Waits for all GPU rendering with the object to have completed.
*
* Wait read dep on all exec_queue and write dep on last write exec_queue are signaled.
* And move all dep on this bo from busy queue to free queue for reuse after rendering completed.
*/
static void
mos_gem_bo_wait_rendering_xe(struct mos_linux_bo *bo)
{
if (bo == nullptr || bo->bufmgr == nullptr)
{
MOS_DRM_ASSERTMESSAGE("ptr is null pointer");
return;
}
mos_xe_bufmgr_gem *bufmgr_gem = (mos_xe_bufmgr_gem *)bo->bufmgr;
int64_t timeout_nsec = INT64_MAX;
uint32_t wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
uint32_t rw_flags = EXEC_OBJECT_READ_XE | EXEC_OBJECT_WRITE_XE;
int ret = __mos_gem_bo_wait_timeline_rendering_with_flags_xe(bo, timeout_nsec, wait_flags, rw_flags, nullptr);
if (ret)
{
MOS_DRM_ASSERTMESSAGE("bo_wait_rendering_xe ret:%d, error:%d", ret, -errno);
}
}
/**
* @timeout_ns indicates to timeout for waiting, but it is fake timeout;
* it only indicates to wait bo rendering completed or check bo busy state.
* if timeout_ns != 0, wait bo rendering completed.
* if timeout_ns == 0. check bo busy state.
*/
static int
mos_gem_bo_wait_xe(struct mos_linux_bo *bo, int64_t timeout_ns)
{
if (timeout_ns)
{
mos_gem_bo_wait_rendering_xe(bo);
return 0;
}
else
{
return mos_gem_bo_busy_xe(bo) ? -ETIME : 0;
}
return 0;
}
/**
* Map gpu resource for CPU read or write.
*
* 1. if map for write, it should wait read dep on all exec_queue and write dep on last write exec_queue signaled.
* 2. if map for read, it should only wait write dep on last write exec_queue signaled.
*
* After bo rendering completed on GPU, then CPU could continue its read or write operation.
*/
static int
mos_bo_map_xe(struct mos_linux_bo *bo, int write_enable)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(bo, -EINVAL)
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
int ret;
int64_t timeout_nsec = INT64_MAX;
uint32_t wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
uint32_t rw_flags = write_enable ? EXEC_OBJECT_WRITE_XE : EXEC_OBJECT_READ_XE;
ret = __mos_gem_bo_wait_timeline_rendering_with_flags_xe(bo, timeout_nsec, wait_flags, rw_flags, nullptr);
if (ret)
{
MOS_DRM_ASSERTMESSAGE("bo wait rendering error(%d ns)", -errno);
}
if (bo_gem->is_userptr)
{
/* Return the same user ptr */
return 0;
}
bufmgr_gem->m_lock.lock();
if (nullptr == bo_gem->mem_virtual)
{
struct drm_xe_gem_mmap_offset mmo;
memclear(mmo);
mmo.handle = bo->handle;
ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mmo);
if (ret)
{
bufmgr_gem->m_lock.unlock();
return ret;
}
bo_gem->mem_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
MAP_SHARED, bufmgr_gem->fd, mmo.offset);
if (MAP_FAILED == bo_gem->mem_virtual)
{
bo_gem->mem_virtual = nullptr;
ret = -errno;
MOS_DRM_ASSERTMESSAGE("Error mapping buffer %d (%s): %s .",
bo_gem->gem_handle, bo_gem->name,
strerror(errno));
}
}
#ifdef __cplusplus
bo->virt = bo_gem->mem_virtual;
#else
bo->virtual = bo_gem->mem_virtual;
#endif
atomic_inc(&bo_gem->map_count);
__mos_bo_mark_mmaps_incoherent_xe(bo);
VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
bufmgr_gem->m_lock.unlock();
return 0;
}
static int
mos_bo_map_wc_xe(struct mos_linux_bo *bo)
{
return mos_bo_map_xe(bo, false);
}
static int mos_bo_unmap_xe(struct mos_linux_bo *bo)
{
struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
MOS_DRM_CHK_NULL_RETURN_VALUE(bo_gem, 0)
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, 0)
if (bo_gem->is_userptr)
return 0;
bufmgr_gem->m_lock.lock();
if (atomic_dec_and_test(&bo_gem->map_count))
{
__mos_bo_mark_mmaps_incoherent_xe(bo);
#ifdef __cplusplus
bo->virt = nullptr;
#else
bo->virtual = nullptr;
#endif
}
bufmgr_gem->m_lock.unlock();
return 0;
}
static int
mos_bo_unmap_wc_xe(struct mos_linux_bo *bo)
{
return mos_bo_unmap_xe(bo);
}
/**
*This aims to dump the sync info on such execution.
*@syncs contains fence in from bo who has dependency on
*currect execution and a fence out in @dep from current execution.
*/
int __mos_dump_syncs_array_xe(struct drm_xe_sync *syncs,
uint32_t count,
mos_xe_dep *dep)
{
#if (_DEBUG || _RELEASE_INTERNAL)
if (__XE_TEST_DEBUG(XE_DEBUG_SYNCHRONIZATION))
{
MOS_DRM_CHK_NULL_RETURN_VALUE(syncs, -EINVAL)
MOS_DRM_CHK_NULL_RETURN_VALUE(dep, -EINVAL)
char log_msg[MOS_MAX_MSG_BUF_SIZE] = { 0 };
int offset = 0;
offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
MOS_MAX_MSG_BUF_SIZE - offset,
"\n\t\t\tdump fence out syncobj: handle = %d, timeline = %ld",
dep->timeline_index);
if (count > 0)
{
offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
MOS_MAX_MSG_BUF_SIZE - offset,
"\n\t\t\tdump exec syncs array, num sync = %d",
count);
}
for (int i = 0; i < count; i++)
{
/**
* Note: we assume all are timeline sync here, and change later when any other
* types sync in use.
*/
offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
MOS_MAX_MSG_BUF_SIZE - offset,
"\n\t\t\t-syncobj_handle = %d, timeline = %ld, sync type = %d, sync flags = %d",
syncs[i].handle, syncs[i].timeline_value, syncs[i].type, syncs[i].flags);
}
offset > MOS_MAX_MSG_BUF_SIZE ?
MOS_DRM_NORMALMESSAGE("imcomplete dump since log msg buffer overwrite %s", log_msg) : MOS_DRM_NORMALMESSAGE("%s", log_msg);
}
#endif
return MOS_XE_SUCCESS;
}
/**
* This is to dump timeline for each exec bo on such execution,
* pair of execed_queue_id & timeline_value will be dumped.
*/
int
__mos_dump_bo_deps_map_xe(struct mos_linux_bo **bo,
int num_bo,
std::vector<mos_xe_exec_bo> &exec_list,
uint32_t curr_exec_queue_id,
std::map<uint32_t, struct mos_xe_context*> ctx_infos)
{
#if (_DEBUG || _RELEASE_INTERNAL)
if (__XE_TEST_DEBUG(XE_DEBUG_SYNCHRONIZATION))
{
MOS_DRM_CHK_NULL_RETURN_VALUE(bo, -EINVAL)
uint32_t exec_list_size = exec_list.size();
for (int i = 0; i < exec_list_size + num_bo; i++)
{
mos_xe_bo_gem *exec_bo_gem = nullptr;
uint32_t exec_flags = 0;
if (i < exec_list_size)
{
exec_bo_gem = (mos_xe_bo_gem *)exec_list[i].bo;
exec_flags = exec_list[i].flags;
}
else
{
exec_bo_gem = (mos_xe_bo_gem *)bo[i - exec_list_size];
exec_flags = EXEC_OBJECT_WRITE_XE; //use write flags for batch bo as default.
}
if (exec_bo_gem)
{
if (exec_bo_gem->is_imported || exec_bo_gem->is_exported)
{
MOS_DRM_NORMALMESSAGE("\n\t\t\tdump external bo, handle=%d, without deps map, skip dump", exec_bo_gem->bo.handle);
}
else
{
char log_msg[MOS_MAX_MSG_BUF_SIZE] = { 0 };
int offset = 0;
offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
MOS_MAX_MSG_BUF_SIZE - offset,
"\n\t\t\tdump %s dep: bo handle=%d, curr_exec_queue_id=%d, curr_op_flags=%d",
i >= exec_list_size ? "batch bo" : "exec bo",
exec_bo_gem->bo.handle,
curr_exec_queue_id,
exec_flags);
auto it = exec_bo_gem->read_deps.begin();
while (it != exec_bo_gem->read_deps.end())
{
if (ctx_infos.count(it->first) > 0)
{
offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
MOS_MAX_MSG_BUF_SIZE - offset,
"\n\t\t\t-read deps: execed_exec_queue_id=%d, syncobj_handle=%d", "timeline = %ld",
it->first,
it->second.dep ? it->second.dep->syncobj_handle : INVALID_HANDLE,
it->second.dep ? it->second.exec_timeline_index : INVALID_HANDLE);
}
it++;
}
it = exec_bo_gem->write_deps.begin();
while (it != exec_bo_gem->write_deps.end())
{
if (ctx_infos.count(it->first) > 0)
{
offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
MOS_MAX_MSG_BUF_SIZE - offset,
"\n\t\t\t-write deps: execed_exec_queue_id=%d, syncobj_handle=%d", "timeline = %ld",
it->first,
it->second.dep ? it->second.dep->syncobj_handle : INVALID_HANDLE,
it->second.dep ? it->second.exec_timeline_index : INVALID_HANDLE);
}
it++;
}
offset > MOS_MAX_MSG_BUF_SIZE ?
MOS_DRM_NORMALMESSAGE("imcomplete dump since log msg buffer overwrite %s", log_msg) : MOS_DRM_NORMALMESSAGE("%s", log_msg);
}
}
}
}
#endif
return MOS_XE_SUCCESS;
}
static int
__mos_context_exec_update_syncs_xe(struct mos_xe_bufmgr_gem *bufmgr_gem,
struct mos_linux_bo **bo,
int num_bo,
struct mos_xe_context *ctx,
std::vector<mos_xe_exec_bo> &exec_list,
std::vector<struct drm_xe_sync> &syncs,
std::vector<struct mos_xe_external_bo_info> &external_bos)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL);
uint32_t curr_dummy_exec_queue_id = ctx->dummy_exec_queue_id;
uint32_t exec_list_size = exec_list.size();
int ret = 0;
std::set<uint32_t> exec_queue_ids;
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL);
MOS_XE_GET_KEYS_FROM_MAP(bufmgr_gem->global_ctx_info, exec_queue_ids);
for (int i = 0; i < exec_list_size + num_bo; i++)
{
mos_xe_bo_gem *exec_bo_gem = nullptr;
uint32_t exec_flags = 0;
if (i < exec_list_size)
{
//exec list bo
exec_bo_gem = (mos_xe_bo_gem *)exec_list[i].bo;
exec_flags = exec_list[i].flags;
}
else
{
//batch bo
exec_bo_gem = (mos_xe_bo_gem *)bo[i - exec_list_size];
exec_flags = EXEC_OBJECT_WRITE_XE; //use write flags for batch bo as default
}
if (exec_bo_gem)
{
if (exec_flags == 0)
{
//Add an assert message here in case of potential thread safety issue.
//Currently, exec bo's flags could only be in (0, EXEC_OBJECT_READ_XE | EXEC_OBJECT_WRITE_XE]
MOS_DRM_ASSERTMESSAGE("Invalid op flags(0x0) for exec bo(handle=%d)", exec_bo_gem->bo.handle);
}
if (exec_bo_gem->is_imported || exec_bo_gem->is_exported)
{
//external bo, need to export its syncobj everytime.
int prime_fd = INVALID_HANDLE;
ret = mos_sync_update_exec_syncs_from_handle(
bufmgr_gem->fd,
exec_bo_gem->bo.handle,
exec_flags,
syncs,
prime_fd);
if (ret == MOS_XE_SUCCESS)
{
/**
* Note, must import batch syncobj for each external bo
* and close the syncobj created for them after exec submission.
*/
int count = syncs.size();
struct mos_xe_external_bo_info infos;
memclear(infos);
infos.syncobj_handle = syncs[count - 1].handle;
infos.prime_fd = prime_fd;
external_bos.push_back(infos);
}
else
{
//Note: continue process even failed.
//This may only cause potential synchronization issue, DONT't crash umd here.
MOS_DRM_ASSERTMESSAGE("Failed to update syncobj for external bo(%d)",
exec_bo_gem->bo.handle);
}
}
else
{
//internal bo
ret = mos_sync_update_exec_syncs_from_timeline_deps(
curr_dummy_exec_queue_id,
exec_bo_gem->last_exec_write_exec_queue,
exec_flags,
exec_queue_ids,
exec_bo_gem->read_deps,
exec_bo_gem->write_deps,
syncs);
}
}
}
return MOS_XE_SUCCESS;
}
static int
__mos_context_exec_update_bo_deps_xe(struct mos_linux_bo **bo,
int num_bo,
std::vector<mos_xe_exec_bo> &exec_list,
uint32_t curr_exec_queue_id,
struct mos_xe_dep *dep)
{
uint32_t exec_list_size = exec_list.size();
for (int i = 0; i < exec_list_size + num_bo; i++)
{
mos_xe_bo_gem *exec_bo_gem = nullptr;
uint32_t exec_flags = 0;
if (i < exec_list_size)
{
//exec list bo
exec_bo_gem = (mos_xe_bo_gem *)exec_list[i].bo;
exec_flags = exec_list[i].flags;
}
else
{
//batch bo
exec_bo_gem = (mos_xe_bo_gem *)bo[i - exec_list_size];
exec_flags = EXEC_OBJECT_WRITE_XE; //use write flags for batch bo as default.
}
if (exec_bo_gem)
{
mos_sync_update_bo_deps(curr_exec_queue_id, exec_flags, dep, exec_bo_gem->read_deps, exec_bo_gem->write_deps);
if (exec_flags & EXEC_OBJECT_READ_XE)
{
exec_bo_gem->last_exec_read_exec_queue = curr_exec_queue_id;
}
if (exec_flags & EXEC_OBJECT_WRITE_XE)
{
exec_bo_gem->last_exec_write_exec_queue = curr_exec_queue_id;
}
}
}
return MOS_XE_SUCCESS;
}
/**
* @ctx indicates to guity ctx that needs to recover for re-submission
* @exec indicates to exec data in previous failed submission to re-submit
* @curr_exec_queue_id indicates to guilty exec_queue_id, it will be replaced by newly creating one
*/
static int
__mos_bo_context_exec_retry_xe(struct mos_bufmgr *bufmgr,
struct mos_linux_context *ctx,
struct drm_xe_exec &exec,
uint32_t &curr_exec_queue_id)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, -EINVAL);
MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL);
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
int ret = MOS_XE_SUCCESS;
//query ctx property firstly to check if failure is caused by exec_queue ban
uint64_t property_value = 0;
ret = __mos_get_context_property_xe(bufmgr, ctx, DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN, property_value);
/**
* if exec_queue is banned, queried value is 1, otherwise it is zero;
* if exec failure is not caused by exec_queue ban, umd could not help recover it.
*/
if (ret || !property_value)
{
MOS_DRM_ASSERTMESSAGE("Failed to retore ctx(%d) with error(%d)",
curr_exec_queue_id, -EPERM);
return -EPERM;
}
ret = __mos_context_restore_xe(bufmgr, ctx);
if (ret == MOS_XE_SUCCESS)
{
curr_exec_queue_id = ctx->ctx_id;
exec.exec_queue_id = curr_exec_queue_id;
//try once again to submit
ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC, &exec);
if (ret)
{
MOS_DRM_ASSERTMESSAGE("Failed to re-submission in DRM_IOCTL_XE_EXEC(errno:%d): new exec_queue_id = %d",
ret, curr_exec_queue_id);
}
}
else
{
MOS_DRM_ASSERTMESSAGE("Failed to retore context with error(%d), exec_queue_id = %d",
ret, curr_exec_queue_id);
}
return ret;
}
/**
* @bo contains batch bo only.
* @num_bo indicates to batch bo num.
* @ctx indicates to the exec exec_queue.
*GPU<->GPU synchronization:
* Exec must ensure the synchronization between GPU->GPU with bellow 8 steps:
* 1. Get the deps from read_deps and write_deps by checking bo's op flags and add it into syncs array;
* a) if flags & READ: get write_deps[last_write_exec_queue != ctx->dummy_exec_queue_id] & STATUS_DEP_BUSY only;
* b) if flags & WRITE: get read_deps[all_exec_queue exclude ctx->dummy_exec_queue_id] & STATUS_DEP_BUSY
* and write_deps[last_write_exec_queue != ctx->dummy_exec_queue_id] & STATUS_DEP_BUSY;
* 2. Export a syncobj from external bo as dep and add it indo syncs array.
* 3. Initial a new timeline dep object for exec queue if it doesn't have and add it to syncs array, otherwise add timeline
* dep from context->timeline_dep directly while it has latest avaiable timeline point in it;
* 4. Exec submittion with batches and syncs.
* 5. Update read_deps[ctx->dummy_exec_queue_id] and write_deps[ctx->dummy_exec_queue_id] with the new deps from the dep_queue;
* 6. Update timeline dep's timeline index to be latest avaiable one for currect exec queue.
* 7. Import syncobj from batch bo for each external bo's DMA buffer for external process to wait media process on demand.
* 8. Close syncobj handle and syncobj fd for external bo to avoid leak.
* GPU->CPU(optional):
* If bo->map_deps.dep exist:
* get it and add it to exec syncs array
*/
static int
mos_bo_context_exec_with_sync_xe(struct mos_linux_bo **bo, int num_bo, struct mos_linux_context *ctx,
struct drm_clip_rect *cliprects, int num_cliprects, int DR4,
unsigned int flags, int *fence)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(bo, -EINVAL)
MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL)
if (num_bo <= 0)
{
MOS_DRM_ASSERTMESSAGE("invalid batch bo num(%d)", num_bo);
return -EINVAL;
}
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo[0]->bufmgr;
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
uint64_t batch_addrs[num_bo];
std::vector<mos_xe_exec_bo> exec_list;
for (int i = 0; i < num_bo; i++)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(bo[i], -EINVAL)
batch_addrs[i] = bo[i]->offset64;
struct mos_xe_bo_gem *batch_bo_gem = (struct mos_xe_bo_gem *) bo[i];
MOS_XE_GET_VALUES_FROM_MAP(batch_bo_gem->exec_list, exec_list);
}
struct mos_xe_context *context = (struct mos_xe_context *) ctx;
uint32_t curr_exec_queue_id = context->ctx.ctx_id;
std::vector<struct mos_xe_external_bo_info> external_bos;
std::vector<struct drm_xe_sync> syncs;
uint64_t curr_timeline = 0;
int ret = 0;
uint32_t exec_list_size = exec_list.size();
if (exec_list_size == 0)
{
MOS_DRM_NORMALMESSAGE("invalid exec list count(%d)", exec_list_size);
}
bufmgr_gem->m_lock.lock();
if (context->timeline_dep == nullptr)
{
context->timeline_dep = mos_sync_create_timeline_dep(bufmgr_gem->fd);
if (context->timeline_dep == nullptr)
{
MOS_DRM_ASSERTMESSAGE("Failed to initial context timeline dep");
bufmgr_gem->m_lock.unlock();
return -ENOMEM;
}
}
struct mos_xe_dep *dep = context->timeline_dep;
//add latest avaiable timeline point(dep) into syncs as fence out point.
mos_sync_update_exec_syncs_from_timeline_dep(
bufmgr_gem->fd,
dep,
syncs);
bufmgr_gem->sync_obj_rw_lock.lock_shared();
//update exec syncs array by external and interbal bo dep
__mos_context_exec_update_syncs_xe(
bufmgr_gem,
bo,
num_bo,
context,
exec_list,
syncs,
external_bos);
//exec submit
uint32_t sync_count = syncs.size();
struct drm_xe_sync *syncs_array = syncs.data();
//dump bo deps map
__mos_dump_bo_deps_map_xe(bo, num_bo, exec_list, curr_exec_queue_id, bufmgr_gem->global_ctx_info);
//dump fence in and fence out info
__mos_dump_syncs_array_xe(syncs_array, sync_count, dep);
struct drm_xe_exec exec;
memclear(exec);
exec.extensions = 0;
exec.exec_queue_id = curr_exec_queue_id;
exec.num_syncs = sync_count;
exec.syncs = (uintptr_t)syncs_array;
/**
* exec.address only accepts batch->offset64 when num bo == 1;
* and it only accepts batch array when num bo > 1
*/
exec.address = (num_bo == 1 ? (uintptr_t)batch_addrs[0] : (uintptr_t)batch_addrs);
exec.num_batch_buffer = num_bo;
ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC, &exec);
if (ret)
{
MOS_DRM_ASSERTMESSAGE("Failed to submission in DRM_IOCTL_XE_EXEC(errno:%d): exec_queue_id = %d, num_syncs = %d, num_bo = %d",
-errno, curr_exec_queue_id, sync_count, num_bo);
//check if it caused by guilty exec_queue_id, if so, could restore the exec_queue_id/ queue here and re-try exec again.
if (ret == -EPERM)
{
ret = __mos_bo_context_exec_retry_xe(&bufmgr_gem->bufmgr, ctx, exec, curr_exec_queue_id);
}
}
curr_timeline = dep->timeline_index;
//update bos' read and write dep with new timeline
__mos_context_exec_update_bo_deps_xe(bo, num_bo, exec_list, context->dummy_exec_queue_id, dep);
//Update dep with latest available timeline
mos_sync_update_timeline_dep(dep);
bufmgr_gem->sync_obj_rw_lock.unlock_shared();
bufmgr_gem->m_lock.unlock();
//import batch syncobj or its point for external bos and close syncobj created for external bo before.
uint32_t external_bo_count = external_bos.size();
int sync_file_fd = INVALID_HANDLE;
int temp_syncobj = INVALID_HANDLE;
if (external_bo_count > 0)
{
temp_syncobj = mos_sync_syncobj_create(bufmgr_gem->fd, 0);
if (temp_syncobj > 0)
{
mos_sync_syncobj_timeline_to_binary(bufmgr_gem->fd, temp_syncobj, dep->syncobj_handle, curr_timeline, 0);
sync_file_fd = mos_sync_syncobj_handle_to_syncfile_fd(bufmgr_gem->fd, temp_syncobj);
}
}
for (int i = 0; i < external_bo_count; i++)
{
//import syncobj for external bos
if (sync_file_fd >= 0)
{
mos_sync_import_syncfile_to_external_bo(bufmgr_gem->fd, external_bos[i].prime_fd, sync_file_fd);
}
if (external_bos[i].prime_fd != INVALID_HANDLE)
{
close(external_bos[i].prime_fd);
}
mos_sync_syncobj_destroy(bufmgr_gem->fd, external_bos[i].syncobj_handle);
}
if (sync_file_fd >= 0)
{
close(sync_file_fd);
}
if (temp_syncobj > 0)
{
mos_sync_syncobj_destroy(bufmgr_gem->fd, temp_syncobj);
}
//Note: keep exec return value for final return value.
return ret;
}
/**
* Get the DEVICE ID for the device. This can be overridden by setting the
* INTEL_DEVID_OVERRIDE environment variable to the desired ID.
*/
static int
mos_get_devid_xe(struct mos_bufmgr *bufmgr)
{
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
int fd = bufmgr_gem->fd;
struct mos_xe_device *dev = &bufmgr_gem->xe_device;
MOS_DRM_CHK_XE_DEV(dev, config, __mos_query_config_xe, 0)
struct drm_xe_query_config *config = dev->config;
return (config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] & 0xffff);
}
static struct drm_xe_query_engines *
__mos_query_engines_xe(int fd)
{
if (fd < 0)
{
return nullptr;
}
struct drm_xe_device_query query;
struct drm_xe_query_engines *engines;
int ret;
memclear(query);
query.extensions = 0;
query.query = DRM_XE_DEVICE_QUERY_ENGINES;
query.size = 0;
query.data = 0;
ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
if (ret || !query.size)
{
MOS_DRM_ASSERTMESSAGE("ret:%d, length:%d", ret, query.size);
return nullptr;
}
engines = (drm_xe_query_engines *)calloc(1, query.size);
MOS_DRM_CHK_NULL_RETURN_VALUE(engines, nullptr)
query.data = (uintptr_t)engines;
ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
if (ret || !query.size)
{
MOS_DRM_ASSERTMESSAGE("ret:%d, length:%d", ret, query.size);
MOS_XE_SAFE_FREE(engines);
return nullptr;
}
return engines;
}
static int
mos_query_engines_count_xe(struct mos_bufmgr *bufmgr, unsigned int *nengine)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(nengine, -EINVAL);
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
int fd = bufmgr_gem->fd;
struct mos_xe_device *dev = &bufmgr_gem->xe_device;
MOS_DRM_CHK_XE_DEV(dev, engines, __mos_query_engines_xe, -ENODEV)
*nengine = dev->engines->num_engines;
return MOS_XE_SUCCESS;
}
int
mos_query_engines_xe(struct mos_bufmgr *bufmgr,
__u16 engine_class,
__u64 caps,
unsigned int *nengine,
void *engine_map)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(nengine, -EINVAL);
MOS_DRM_CHK_NULL_RETURN_VALUE(engine_map, -EINVAL);
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
struct drm_xe_engine_class_instance *ci = (struct drm_xe_engine_class_instance *)engine_map;
int fd = bufmgr_gem->fd;
struct mos_xe_device *dev = &bufmgr_gem->xe_device;
MOS_DRM_CHK_XE_DEV(dev, engines, __mos_query_engines_xe, -ENODEV)
struct drm_xe_query_engines *engines = dev->engines;
int i, num;
struct drm_xe_engine *engine;
for (i = 0, num = 0; i < engines->num_engines; i++)
{
engine = (struct drm_xe_engine *)&engines->engines[i];
if (engine_class == engine->instance.engine_class)
{
ci->engine_class = engine_class;
ci->engine_instance = engine->instance.engine_instance;
ci->gt_id = engine->instance.gt_id;
ci++;
num++;
}
if (num > *nengine)
{
MOS_DRM_ASSERTMESSAGE("Number of engine instances out of range, %d,%d", num, *nengine);
return -1;
}
}
//Note30: need to confirm if engine_instance is ordered, otherwise re-order needed.
*nengine = num;
return 0;
}
static size_t
mos_get_engine_class_size_xe()
{
return sizeof(struct drm_xe_engine_class_instance);
}
static int
mos_query_sysinfo_xe(struct mos_bufmgr *bufmgr, MEDIA_SYSTEM_INFO* gfx_info)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, -EINVAL);
MOS_DRM_CHK_NULL_RETURN_VALUE(gfx_info, -EINVAL);
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
int fd = bufmgr_gem->fd;
struct mos_xe_device *dev = &bufmgr_gem->xe_device;
int ret;
MOS_DRM_CHK_XE_DEV(dev, engines, __mos_query_engines_xe, -ENODEV)
if (0 == gfx_info->VDBoxInfo.NumberOfVDBoxEnabled
|| 0 == gfx_info->VEBoxInfo.NumberOfVEBoxEnabled)
{
unsigned int num_vd = 0;
unsigned int num_ve = 0;
for (unsigned int i = 0; i < dev->engines->num_engines; i++)
{
if (0 == gfx_info->VDBoxInfo.NumberOfVDBoxEnabled
&& dev->engines->engines[i].instance.engine_class == DRM_XE_ENGINE_CLASS_VIDEO_DECODE)
{
gfx_info->VDBoxInfo.Instances.VDBoxEnableMask |=
1 << dev->engines->engines[i].instance.engine_instance;
num_vd++;
}
if (0 == gfx_info->VEBoxInfo.NumberOfVEBoxEnabled
&& dev->engines->engines[i].instance.engine_class == DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE)
{
num_ve++;
}
}
if (num_vd > 0)
{
gfx_info->VDBoxInfo.NumberOfVDBoxEnabled = num_vd;
}
if (num_vd > 0)
{
gfx_info->VEBoxInfo.NumberOfVEBoxEnabled = num_ve;
}
}
return 0;
}
void mos_select_fixed_engine_xe(struct mos_bufmgr *bufmgr,
void *engine_map,
uint32_t *nengine,
uint32_t fixed_instance_mask)
{
MOS_UNUSED(bufmgr);
#if (DEBUG || _RELEASE_INTERNAL)
if (fixed_instance_mask)
{
struct drm_xe_engine_class_instance *_engine_map = (struct drm_xe_engine_class_instance *)engine_map;
auto unselect_index = 0;
for (auto bit = 0; bit < *nengine; bit++)
{
if (((fixed_instance_mask >> bit) & 0x1) && (bit > unselect_index))
{
_engine_map[unselect_index].engine_class = _engine_map[bit].engine_class;
_engine_map[unselect_index].engine_instance = _engine_map[bit].engine_instance;
_engine_map[unselect_index].gt_id = _engine_map[bit].gt_id;
_engine_map[unselect_index].pad = _engine_map[bit].pad;
_engine_map[bit].engine_class = 0;
_engine_map[bit].engine_instance = 0;
_engine_map[bit].gt_id = 0;
_engine_map[bit].pad = 0;
unselect_index++;
}
else if (((fixed_instance_mask >> bit) & 0x1) && (bit == unselect_index))
{
unselect_index++;
}
else if (!((fixed_instance_mask >> bit) & 0x1))
{
_engine_map[bit].engine_class = 0;
_engine_map[bit].engine_instance = 0;
_engine_map[bit].gt_id = 0;
_engine_map[bit].pad = 0;
}
}
*nengine = unselect_index;
}
#else
MOS_UNUSED(engine_map);
MOS_UNUSED(nengine);
MOS_UNUSED(fixed_instance_mask);
#endif
}
/**
* Note: xe kmd doesn't support query blob before dg2.
*/
static uint32_t *
__mos_query_hw_config_xe(int fd)
{
struct drm_xe_device_query query;
uint32_t *hw_config;
int ret;
if (fd < 0)
{
return nullptr;
}
memclear(query);
query.query = DRM_XE_DEVICE_QUERY_HWCONFIG;
ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
if (ret || !query.size)
{
MOS_DRM_ASSERTMESSAGE("ret:%d, length:%d", ret, query.size);
return nullptr;
}
hw_config = (uint32_t *)calloc(1, query.size + sizeof(uint32_t));
MOS_DRM_CHK_NULL_RETURN_VALUE(hw_config, nullptr)
query.data = (uintptr_t)&hw_config[1];
ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
if (ret != 0 || query.size <= 0)
{
MOS_DRM_ASSERTMESSAGE("ret:%d, length:%d", ret, query.size);
MOS_XE_SAFE_FREE(hw_config);
return nullptr;
}
hw_config[0] = query.size / sizeof(uint32_t);
return hw_config;
}
static int
mos_query_device_blob_xe(struct mos_bufmgr *bufmgr, MEDIA_SYSTEM_INFO* gfx_info)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(gfx_info, -EINVAL)
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
int fd = bufmgr_gem->fd;
struct mos_xe_device *dev = &bufmgr_gem->xe_device;
MOS_DRM_CHK_XE_DEV(dev, hw_config, __mos_query_hw_config_xe, -ENODEV)
uint32_t *hwconfig = &dev->hw_config[1];
uint32_t num_config = dev->hw_config[0];
int i = 0;
while (i < num_config) {
/* Attribute ID starts with 1 */
assert(hwconfig[i] > 0);
#if DEBUG_BLOB_QUERY
MOS_DRM_NORMALMESSAGE("query blob: key=%s, value=%d", key_string[hwconfig[i]], hwconfig[i+2]);
#endif
if (INTEL_HWCONFIG_MAX_SLICES_SUPPORTED == hwconfig[i])
{
assert(hwconfig[i+1] == 1);
gfx_info->SliceCount = hwconfig[i+2];
gfx_info->MaxSlicesSupported = hwconfig[i+2];
}
if ((INTEL_HWCONFIG_MAX_DUAL_SUBSLICES_SUPPORTED == hwconfig[i])
|| (INTEL_HWCONFIG_MAX_SUBSLICE == hwconfig[i]))
{
assert(hwconfig[i+1] == 1);
gfx_info->SubSliceCount = hwconfig[i+2];
gfx_info->MaxSubSlicesSupported = hwconfig[i+2];
}
if ((INTEL_HWCONFIG_MAX_NUM_EU_PER_DSS == hwconfig[i])
|| (INTEL_HWCONFIG_MAX_EU_PER_SUBSLICE == hwconfig[i]))
{
assert(hwconfig[i+1] == 1);
gfx_info->MaxEuPerSubSlice = hwconfig[i+2];
}
if (INTEL_HWCONFIG_DEPRECATED_L3_CACHE_SIZE_IN_KB == hwconfig[i])
{
assert(hwconfig[i+1] == 1);
gfx_info->L3CacheSizeInKb = hwconfig[i+2];
}
if (INTEL_HWCONFIG_NUM_THREADS_PER_EU == hwconfig[i])
{
assert(hwconfig[i+1] == 1);
gfx_info->NumThreadsPerEu = hwconfig[i+2];
}
if (INTEL_HWCONFIG_MAX_VECS == hwconfig[i])
{
assert(hwconfig[i+1] == 1);
gfx_info->MaxVECS = hwconfig[i+2];
}
/* Advance to next key */
i += hwconfig[i + 1]; // value size
i += 2;// KL size
}
return 0;
}
static void
mos_enable_reuse_xe(struct mos_bufmgr *bufmgr)
{
MOS_UNIMPLEMENT(bufmgr);
}
// The function is not supported on KMD
static int mos_query_hw_ip_version_xe(struct mos_bufmgr *bufmgr, __u16 engine_class, void *ip_ver_info)
{
MOS_UNIMPLEMENT(bufmgr);
MOS_UNIMPLEMENT(engine_class);
MOS_UNIMPLEMENT(ip_ver_info);
return 0;
}
static void
mos_bo_free_xe(struct mos_linux_bo *bo)
{
struct mos_xe_bufmgr_gem *bufmgr_gem = nullptr;
struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
struct drm_gem_close close_ioctl;
int ret;
if (nullptr == bo_gem)
{
MOS_DRM_ASSERTMESSAGE("bo == nullptr");
return;
}
bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
if (nullptr == bufmgr_gem)
{
MOS_DRM_ASSERTMESSAGE("bufmgr_gem == nullptr");
return;
}
mos_gem_bo_wait_rendering_xe(bo);
bufmgr_gem->m_lock.lock();
if (!bo_gem->is_userptr)
{
if (bo_gem->mem_virtual)
{
VG(VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, 0));
drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
bo_gem->mem_virtual = nullptr;
}
}
if (bo->vm_id != INVALID_VM)
{
ret = mos_vm_bind_sync_xe(bufmgr_gem->fd,
bo->vm_id,
0,
0,
bo->offset64,
bo->size,
bo_gem->pat_index,
DRM_XE_VM_BIND_OP_UNMAP);
if (ret)
{
MOS_DRM_ASSERTMESSAGE("mos_gem_bo_free mos_vm_unbind ret error. bo:0x%lx, vm_id:%d\r",
(uint64_t)bo,
bo->vm_id);
}
else
{
bo->vm_id = INVALID_VM;
}
}
if (!bo_gem->is_userptr)
{
/* Close this object */
memclear(close_ioctl);
close_ioctl.handle = bo_gem->gem_handle;
ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_ioctl);
if (ret != 0)
{
MOS_DRM_ASSERTMESSAGE("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s",
bo_gem->gem_handle, bo_gem->name, strerror(errno));
}
}
if (bufmgr_gem->mem_profiler_fd != -1)
{
snprintf(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE, "GEM_CLOSE, %d, %d, %lu, %d\n", getpid(), bo->handle,bo->size,bo_gem->mem_region);
ret = write(bufmgr_gem->mem_profiler_fd, bufmgr_gem->mem_profiler_buffer, strnlen(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE));
if (-1 == ret)
{
snprintf(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE, "GEM_CLOSE, %d, %d, %lu, %d\n", getpid(), bo->handle,bo->size,bo_gem->mem_region);
ret = write(bufmgr_gem->mem_profiler_fd, bufmgr_gem->mem_profiler_buffer, strnlen(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE));
if (-1 == ret)
{
MOS_DRM_ASSERTMESSAGE("Failed to write to %s: %s", bufmgr_gem->mem_profiler_path, strerror(errno));
}
}
}
/* Return the VMA for reuse */
__mos_bo_vma_free_xe(bo->bufmgr, bo->offset64, bo->size);
bufmgr_gem->m_lock.unlock();
MOS_Delete(bo_gem);
}
static int
mos_bo_set_softpin_xe(MOS_LINUX_BO *bo)
{
MOS_UNIMPLEMENT(bo);
return 0;
}
static void
mos_bufmgr_gem_destroy_xe(struct mos_bufmgr *bufmgr)
{
if (nullptr == bufmgr)
return;
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
struct mos_xe_device *dev = &bufmgr_gem->xe_device;
int i, ret;
/* Release userptr bo kept hanging around for optimisation. */
mos_vma_heap_finish(&bufmgr_gem->vma_heap[MEMZONE_SYS]);
mos_vma_heap_finish(&bufmgr_gem->vma_heap[MEMZONE_DEVICE]);
mos_vma_heap_finish(&bufmgr_gem->vma_heap[MEMZONE_PRIME]);
if (bufmgr_gem->vm_id != INVALID_VM)
{
__mos_vm_destroy_xe(bufmgr, bufmgr_gem->vm_id);
bufmgr_gem->vm_id = INVALID_VM;
}
if (bufmgr_gem->mem_profiler_fd != -1)
{
close(bufmgr_gem->mem_profiler_fd);
}
MOS_XE_SAFE_FREE(dev->hw_config);
dev->hw_config = nullptr;
MOS_XE_SAFE_FREE(dev->config);
dev->config = nullptr;
MOS_XE_SAFE_FREE(dev->engines);
dev->engines = nullptr;
MOS_XE_SAFE_FREE(dev->mem_regions);
dev->mem_regions = nullptr;
MOS_XE_SAFE_FREE(dev->gt_list);
dev->gt_list = nullptr;
MOS_Delete(bufmgr_gem);
}
static void
mos_bufmgr_gem_unref_xe(struct mos_bufmgr *bufmgr)
{
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
if (bufmgr_gem && atomic_add_unless(&bufmgr_gem->ref_count, -1, 1))
{
pthread_mutex_lock(&bufmgr_list_mutex);
if (atomic_dec_and_test(&bufmgr_gem->ref_count))
{
DRMLISTDEL(&bufmgr_gem->managers);
mos_bufmgr_gem_destroy_xe(bufmgr);
}
pthread_mutex_unlock(&bufmgr_list_mutex);
}
}
static int
mo_get_context_param_xe(struct mos_linux_context *ctx,
uint32_t size,
uint64_t param,
uint64_t *value)
{
MOS_UNIMPLEMENT(ctx);
MOS_UNIMPLEMENT(size);
MOS_UNIMPLEMENT(param);
MOS_UNIMPLEMENT(value);
return 0;
}
static void mos_enable_softpin_xe(struct mos_bufmgr *bufmgr, bool va1m_align)
{
MOS_UNIMPLEMENT(bufmgr);
MOS_UNIMPLEMENT(va1m_align);
}
static int
mos_get_reset_stats_xe(struct mos_linux_context *ctx,
uint32_t *reset_count,
uint32_t *active,
uint32_t *pending)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL);
struct mos_xe_context *context = (struct mos_xe_context *)ctx;
if (reset_count)
*reset_count = context->reset_count;
if (active)
*active = 0;
if (pending)
*pending = 0;
return 0;
}
static mos_oca_exec_list_info*
mos_bo_get_oca_exec_list_info_xe(struct mos_linux_bo *bo, int *count)
{
if (nullptr == bo || nullptr == count)
{
return nullptr;
}
mos_oca_exec_list_info *info = nullptr;
int counter = 0;
int MAX_COUNT = 50;
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *)bo;
int exec_list_count = bo_gem->exec_list.size();
if (exec_list_count == 0 || exec_list_count > MAX_COUNT)
{
return nullptr;
}
info = (mos_oca_exec_list_info *)malloc((exec_list_count + 1) * sizeof(mos_oca_exec_list_info));
if (!info)
{
MOS_DRM_ASSERTMESSAGE("malloc mos_oca_exec_list_info failed");
return info;
}
for (auto &it : bo_gem->exec_list)
{
/*note: set capture for each bo*/
struct mos_xe_bo_gem *exec_bo_gem = (struct mos_xe_bo_gem *)it.second.bo;
uint32_t exec_flags = it.second.flags;
if (exec_bo_gem)
{
info[counter].handle = exec_bo_gem->bo.handle;
info[counter].size = exec_bo_gem->bo.size;
info[counter].offset64 = exec_bo_gem->bo.offset64;
info[counter].flags = exec_flags;
info[counter].mem_region = exec_bo_gem->mem_region;
info[counter].is_batch = false;
counter++;
}
}
/*note: bo is cmd bo, also need to be added*/
info[counter].handle = bo->handle;
info[counter].size = bo->size;
info[counter].offset64 = bo->offset64;
info[counter].flags = EXEC_OBJECT_WRITE_XE; // use write flags for batch bo as default.
info[counter].mem_region = bo_gem->mem_region;
info[counter].is_batch = true;
counter++;
*count = counter;
return info;
}
static bool
mos_has_bsd2_xe(struct mos_bufmgr *bufmgr)
{
MOS_UNUSED(bufmgr);
return true;
}
static void
mos_bo_set_object_capture_xe(struct mos_linux_bo *bo)
{
MOS_UNIMPLEMENT(bo);
}
static void
mos_bo_set_object_async_xe(struct mos_linux_bo *bo)
{
MOS_UNIMPLEMENT(bo);
}
static int
mos_get_driver_info_xe(struct mos_bufmgr *bufmgr, struct LinuxDriverInfo *drvInfo)
{
MOS_DRM_CHK_NULL_RETURN_VALUE(drvInfo, -EINVAL)
struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
struct mos_xe_device *dev = &bufmgr_gem->xe_device;
int fd = bufmgr_gem->fd;
uint32_t MaxEuPerSubSlice = 0;
int i = 0;
drvInfo->hasBsd = 1;
drvInfo->hasBsd2 = 1;
drvInfo->hasVebox = 1;
//For XE driver always has ppgtt
drvInfo->hasPpgtt = 1;
/**
* query blob
* Note: xe kmd doesn't support query blob before dg2, so don't check null and return here.
*/
if (dev->hw_config == nullptr)
{
dev->hw_config = __mos_query_hw_config_xe(fd);
}
if (dev->hw_config)
{
uint32_t *hw_config = &dev->hw_config[1];
uint32_t num_config = dev->hw_config[0];
while (i < num_config)
{
/* Attribute ID starts with 1 */
assert(hw_config[i] > 0);
#if DEBUG_BLOB_QUERY
MOS_DRM_NORMALMESSAGE("query blob: key=%s, value=%d", key_string[hw_config[i]], hw_config[i+2]);
#endif
if (INTEL_HWCONFIG_MAX_SLICES_SUPPORTED == hw_config[i])
{
assert(hw_config[i+1] == 1);
drvInfo->sliceCount = hw_config[i+2];
}
if ((INTEL_HWCONFIG_MAX_DUAL_SUBSLICES_SUPPORTED == hw_config[i])
|| (INTEL_HWCONFIG_MAX_SUBSLICE == hw_config[i]))
{
assert(hw_config[i+1] == 1);
drvInfo->subSliceCount = hw_config[i+2];
}
if ((INTEL_HWCONFIG_MAX_NUM_EU_PER_DSS == hw_config[i])
|| (INTEL_HWCONFIG_MAX_EU_PER_SUBSLICE == hw_config[i]))
{
assert(hw_config[i+1] == 1);
MaxEuPerSubSlice = hw_config[i+2];
}
/* Advance to next key */
i += hw_config[i + 1]; // value size
i += 2;// KL size
}
drvInfo->euCount = drvInfo->subSliceCount * MaxEuPerSubSlice;
}
else
{
drvInfo->euCount = 96;
drvInfo->subSliceCount = 6;
drvInfo->sliceCount = 1;
}
// query engines info
MOS_DRM_CHK_XE_DEV(dev, engines, __mos_query_engines_xe, -ENODEV)
struct drm_xe_query_engines *engines = dev->engines;
int num_vd = 0;
int num_ve = 0;
for (i = 0; i < engines->num_engines; i++)
{
if (DRM_XE_ENGINE_CLASS_VIDEO_DECODE == engines->engines[i].instance.engine_class)
{
num_vd++;
}
else if (DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE == engines->engines[i].instance.engine_class)
{
num_ve++;
}
}
if (num_vd >= 1)
{
drvInfo->hasBsd = 1;
}
if (num_vd >= 2)
{
drvInfo->hasBsd2 = 1;
}
if (num_ve >= 1)
{
drvInfo->hasVebox = 1;
}
drvInfo->hasHuc = 1;
if (1 == drvInfo->hasHuc)
{
drvInfo->hasProtectedHuc = 1;
}
// query config
MOS_DRM_CHK_XE_DEV(dev, config, __mos_query_config_xe, -ENODEV)
struct drm_xe_query_config *config = dev->config;
drvInfo->devId = config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] & 0xffff;
drvInfo->devRev = config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] >> 16;
return MOS_XE_SUCCESS;
}
/**
* Initializes the GEM buffer manager, which uses the kernel to allocate, map,
* and manage map buffer objections.
*
* \param fd File descriptor of the opened DRM device.
*/
struct mos_bufmgr *
mos_bufmgr_gem_init_xe(int fd, int batch_size)
{
//Note: don't put this field in bufmgr in case of bufmgr inaccessable in some functions
#if (_DEBUG || _RELEASE_INTERNAL)
MOS_READ_ENV_VARIABLE(INTEL_XE_BUFMGR_DEBUG, MOS_USER_FEATURE_VALUE_TYPE_INT64, __xe_bufmgr_debug__);
if (__xe_bufmgr_debug__ < 0)
{
__xe_bufmgr_debug__ = 0;
}
#endif
struct mos_xe_bufmgr_gem *bufmgr_gem;
int ret, tmp;
struct mos_xe_device *dev = nullptr;
pthread_mutex_lock(&bufmgr_list_mutex);
bufmgr_gem = mos_bufmgr_gem_find(fd);
if (bufmgr_gem)
goto exit;
bufmgr_gem = MOS_New(mos_xe_bufmgr_gem);
if (nullptr == bufmgr_gem)
goto exit;
bufmgr_gem->bufmgr = {};
bufmgr_gem->xe_device = {};
dev = &bufmgr_gem->xe_device;
bufmgr_gem->fd = fd;
bufmgr_gem->vm_id = INVALID_VM;
atomic_set(&bufmgr_gem->ref_count, 1);
bufmgr_gem->bufmgr.vm_create = mos_vm_create_xe;
bufmgr_gem->bufmgr.vm_destroy = mos_vm_destroy_xe;
bufmgr_gem->bufmgr.context_create = mos_context_create_xe;
bufmgr_gem->bufmgr.context_create_ext = mos_context_create_ext_xe;
bufmgr_gem->bufmgr.context_create_shared = mos_context_create_shared_xe;
bufmgr_gem->bufmgr.context_destroy = mos_context_destroy_xe;
bufmgr_gem->bufmgr.bo_alloc = mos_bo_alloc_xe;
bufmgr_gem->bufmgr.bo_add_softpin_target = mos_gem_bo_update_exec_list_xe;
bufmgr_gem->bufmgr.bo_clear_relocs = mos_gem_bo_clear_exec_list_xe;
bufmgr_gem->bufmgr.bo_alloc_userptr = mos_bo_alloc_userptr_xe;
bufmgr_gem->bufmgr.bo_alloc_tiled = mos_bo_alloc_tiled_xe;
bufmgr_gem->bufmgr.bo_map = mos_bo_map_xe;
bufmgr_gem->bufmgr.bo_busy = mos_gem_bo_busy_xe;
bufmgr_gem->bufmgr.bo_wait_rendering = mos_gem_bo_wait_rendering_xe;
bufmgr_gem->bufmgr.bo_wait = mos_gem_bo_wait_xe;
bufmgr_gem->bufmgr.bo_map_wc = mos_bo_map_wc_xe;
bufmgr_gem->bufmgr.bo_unmap = mos_bo_unmap_xe;
bufmgr_gem->bufmgr.bo_unmap_wc = mos_bo_unmap_wc_xe;
bufmgr_gem->bufmgr.bo_create_from_prime = mos_bo_create_from_prime_xe;
bufmgr_gem->bufmgr.bo_export_to_prime = mos_bo_export_to_prime_xe;
bufmgr_gem->bufmgr.get_devid = mos_get_devid_xe;
bufmgr_gem->bufmgr.query_engines_count = mos_query_engines_count_xe;
bufmgr_gem->bufmgr.query_engines = mos_query_engines_xe;
bufmgr_gem->bufmgr.get_engine_class_size = mos_get_engine_class_size_xe;
bufmgr_gem->bufmgr.query_sys_engines = mos_query_sysinfo_xe;
bufmgr_gem->bufmgr.select_fixed_engine = mos_select_fixed_engine_xe;
bufmgr_gem->bufmgr.query_device_blob = mos_query_device_blob_xe;
bufmgr_gem->bufmgr.get_driver_info = mos_get_driver_info_xe;
bufmgr_gem->bufmgr.destroy = mos_bufmgr_gem_unref_xe;
bufmgr_gem->bufmgr.query_hw_ip_version = mos_query_hw_ip_version_xe;
bufmgr_gem->bufmgr.get_platform_information = mos_get_platform_information_xe;
bufmgr_gem->bufmgr.set_platform_information = mos_set_platform_information_xe;
bufmgr_gem->bufmgr.enable_reuse = mos_enable_reuse_xe;
bufmgr_gem->bufmgr.bo_reference = mos_bo_reference_xe;
bufmgr_gem->bufmgr.bo_unreference = mos_bo_unreference_xe;
bufmgr_gem->bufmgr.bo_set_softpin = mos_bo_set_softpin_xe;
bufmgr_gem->bufmgr.enable_softpin = mos_enable_softpin_xe;
bufmgr_gem->bufmgr.get_context_param = mo_get_context_param_xe;
bufmgr_gem->bufmgr.get_reset_stats = mos_get_reset_stats_xe;
bufmgr_gem->bufmgr.bo_get_softpin_targets_info = mos_bo_get_oca_exec_list_info_xe;
bufmgr_gem->bufmgr.has_bsd2= mos_has_bsd2_xe;
bufmgr_gem->bufmgr.set_object_capture = mos_bo_set_object_capture_xe;
bufmgr_gem->bufmgr.set_object_async = mos_bo_set_object_async_xe;
bufmgr_gem->bufmgr.bo_context_exec3 = mos_bo_context_exec_with_sync_xe;
bufmgr_gem->exec_queue_timeslice = EXEC_QUEUE_TIMESLICE_DEFAULT;
MOS_READ_ENV_VARIABLE(INTEL_ENGINE_TIMESLICE, MOS_USER_FEATURE_VALUE_TYPE_INT32, bufmgr_gem->exec_queue_timeslice);
if (bufmgr_gem->exec_queue_timeslice <= 0
|| bufmgr_gem->exec_queue_timeslice >= EXEC_QUEUE_TIMESLICE_MAX)
{
bufmgr_gem->exec_queue_timeslice = EXEC_QUEUE_TIMESLICE_DEFAULT;
}
bufmgr_gem->mem_profiler_fd = -1;
bufmgr_gem->mem_profiler_path = getenv("MEDIA_MEMORY_PROFILER_LOG");
if (bufmgr_gem->mem_profiler_path != nullptr)
{
if (strcmp(bufmgr_gem->mem_profiler_path, "/sys/kernel/debug/tracing/trace_marker") == 0)
{
ret = bufmgr_gem->mem_profiler_fd = open(bufmgr_gem->mem_profiler_path, O_WRONLY );
}
else
{
ret = bufmgr_gem->mem_profiler_fd = open(bufmgr_gem->mem_profiler_path, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
}
if ( -1 == ret)
{
MOS_DRM_ASSERTMESSAGE("Failed to open %s: %s", bufmgr_gem->mem_profiler_path, strerror(errno));
}
}
dev->uc_versions[UC_TYPE_GUC_SUBMISSION].uc_type = UC_TYPE_INVALID;
dev->uc_versions[UC_TYPE_HUC].uc_type = UC_TYPE_INVALID;
bufmgr_gem->vm_id = __mos_vm_create_xe(&bufmgr_gem->bufmgr);
__mos_query_mem_regions_instance_mask_xe(&bufmgr_gem->bufmgr);
__mos_has_vram_xe(&bufmgr_gem->bufmgr);
__mos_get_default_alignment_xe(&bufmgr_gem->bufmgr);
DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list);
DRMINITLISTHEAD(&bufmgr_gem->named);
mos_vma_heap_init(&bufmgr_gem->vma_heap[MEMZONE_SYS], MEMZONE_SYS_START, MEMZONE_SYS_SIZE);
mos_vma_heap_init(&bufmgr_gem->vma_heap[MEMZONE_DEVICE], MEMZONE_DEVICE_START, MEMZONE_DEVICE_SIZE);
mos_vma_heap_init(&bufmgr_gem->vma_heap[MEMZONE_PRIME], MEMZONE_PRIME_START, MEMZONE_PRIME_SIZE);
exit:
pthread_mutex_unlock(&bufmgr_list_mutex);
return bufmgr_gem != nullptr ? &bufmgr_gem->bufmgr : nullptr;
}
int mos_get_dev_id_xe(int fd, uint32_t *device_id)
{
if (fd < 0 || nullptr == device_id)
{
return -EINVAL;
}
struct drm_xe_query_config *config = __mos_query_config_xe(fd);
MOS_DRM_CHK_NULL_RETURN_VALUE(config, -ENODEV)
*device_id = config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] & 0xffff;
MOS_XE_SAFE_FREE(config);
return MOS_XE_SUCCESS;
}