From e11e7b9b94b06f1e0e0ce2fa329075825a26d179 Mon Sep 17 00:00:00 2001 From: John Falkowski Date: Fri, 24 Jan 2025 03:59:30 +0000 Subject: [PATCH] feature: Add shared System USM Allocation in support of appendLaunchKernel Related-To: NEO-12988 Signed-off-by: John Falkowski --- shared/source/os_interface/linux/drm_neo.cpp | 27 +++++++ shared/source/os_interface/linux/drm_neo.h | 5 +- .../source/os_interface/linux/ioctl_helper.h | 5 +- .../os_interface/linux/ioctl_helper_i915.cpp | 5 +- .../os_interface/linux/xe/ioctl_helper_xe.cpp | 42 +++++++--- .../os_interface/linux/xe/ioctl_helper_xe.h | 3 +- shared/test/common/libult/linux/drm_mock.h | 2 + .../linux/xe/ioctl_helper_xe_tests.cpp | 81 +++++++++++++++++++ 8 files changed, 153 insertions(+), 17 deletions(-) diff --git a/shared/source/os_interface/linux/drm_neo.cpp b/shared/source/os_interface/linux/drm_neo.cpp index 2df251e11a..a9250f368a 100644 --- a/shared/source/os_interface/linux/drm_neo.cpp +++ b/shared/source/os_interface/linux/drm_neo.cpp @@ -45,6 +45,7 @@ #include "shared/source/os_interface/product_helper.h" #include "shared/source/release_helper/release_helper.h" #include "shared/source/utilities/api_intercept.h" +#include "shared/source/utilities/cpu_info.h" #include "shared/source/utilities/directory.h" #include "shared/source/utilities/io_functions.h" @@ -55,6 +56,10 @@ #include #include +#ifndef DRM_XE_VM_BIND_FLAG_SYSTEM_ALLOCATOR +#define DRM_XE_VM_BIND_FLAG_SYSTEM_ALLOCATOR (1 << 4) +#endif + namespace NEO { Drm::Drm(std::unique_ptr &&hwDeviceIdIn, RootDeviceEnvironment &rootDeviceEnvironment) @@ -1449,6 +1454,8 @@ int changeBufferObjectBinding(Drm *drm, OsContext *osContext, uint32_t vmHandleI vmBind.offset = 0; vmBind.start = bo->peekAddress(); vmBind.userptr = bo->getUserptr(); + vmBind.sharedSystemUsmEnabled = drm->isSharedSystemAllocEnabled(); + vmBind.sharedSystemUsmBind = false; if (bo->getColourWithBind()) { vmBind.length = bo->getColourChunk(); @@ -1562,6 +1569,26 @@ int Drm::createDrmVirtualMemory(uint32_t &drmVmId) { if (ret == 0) { drmVmId = ctl.vmId; + if (isSharedSystemAllocEnabled()) { + VmBindParams vmBind{}; + vmBind.vmId = static_cast(ctl.vmId); + vmBind.flags = DRM_XE_VM_BIND_FLAG_SYSTEM_ALLOCATOR; + vmBind.length = (0x1ull << ((NEO::CpuInfo::getInstance().getVirtualAddressSize()) - 1)); + vmBind.sharedSystemUsmEnabled = true; + vmBind.sharedSystemUsmBind = true; + VmBindExtUserFenceT vmBindExtUserFence{}; + ioctlHelper->fillVmBindExtUserFence(vmBindExtUserFence, + castToUint64(ioctlHelper->getPagingFenceAddress(0, nullptr)), + getNextFenceVal(0), + vmBind.extensions); + ioctlHelper->setVmBindUserFence(vmBind, vmBindExtUserFence); + + if (ioctlHelper->vmBind(vmBind)) { + setSharedSystemAllocEnable(false); + printDebugString(debugManager.flags.PrintDebugMessages.get(), stderr, + "INFO: Shared System USM capability not detected\n"); + } + } if (ctl.vmId == 0) { // 0 is reserved for invalid/unassigned ppgtt return -1; diff --git a/shared/source/os_interface/linux/drm_neo.h b/shared/source/os_interface/linux/drm_neo.h index 721be7bb40..4bfac6cb51 100644 --- a/shared/source/os_interface/linux/drm_neo.h +++ b/shared/source/os_interface/linux/drm_neo.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -151,6 +151,8 @@ class Drm : public DriverModel { void setDirectSubmissionActive(bool value) { this->directSubmissionActive = value; } bool isDirectSubmissionActive() const { return this->directSubmissionActive; } + MOCKABLE_VIRTUAL void setSharedSystemAllocEnable(bool value) { this->sharedSystemAllocEnable = value; } + MOCKABLE_VIRTUAL bool isSharedSystemAllocEnabled() const { return this->sharedSystemAllocEnable; } MOCKABLE_VIRTUAL bool isSetPairAvailable(); MOCKABLE_VIRTUAL bool getSetPairAvailable() { return setPairAvailable; } @@ -347,6 +349,7 @@ class Drm : public DriverModel { bool requirePerContextVM = false; bool bindAvailable = false; bool directSubmissionActive = false; + bool sharedSystemAllocEnable = false; bool setPairAvailable = false; bool chunkingAvailable = false; uint32_t chunkingMode = 0; diff --git a/shared/source/os_interface/linux/ioctl_helper.h b/shared/source/os_interface/linux/ioctl_helper.h index 7c810a3e4d..1626cb186f 100644 --- a/shared/source/os_interface/linux/ioctl_helper.h +++ b/shared/source/os_interface/linux/ioctl_helper.h @@ -75,6 +75,8 @@ struct VmBindParams { uint64_t userFence; uint64_t patIndex; uint64_t userptr; + bool sharedSystemUsmEnabled; + bool sharedSystemUsmBind; }; struct UuidRegisterResult { @@ -245,8 +247,7 @@ class IoctlHelperI915 : public IoctlHelper { public: using IoctlHelper::IoctlHelper; - static bool queryDeviceIdAndRevision(const Drm &drm); - + static bool queryDeviceIdAndRevision(Drm &drm); void fillExecObject(ExecObject &execObject, uint32_t handle, uint64_t gpuAddress, uint32_t drmContextId, bool bindInfo, bool isMarkedForCapture) override; void logExecObject(const ExecObject &execObject, std::stringstream &logger, size_t size) override; void fillExecBuffer(ExecBuffer &execBuffer, uintptr_t buffersPtr, uint32_t bufferCount, uint32_t startOffset, uint32_t size, uint64_t flags, uint32_t drmContextId) override; diff --git a/shared/source/os_interface/linux/ioctl_helper_i915.cpp b/shared/source/os_interface/linux/ioctl_helper_i915.cpp index 22e45a9ce3..91ee74d99a 100644 --- a/shared/source/os_interface/linux/ioctl_helper_i915.cpp +++ b/shared/source/os_interface/linux/ioctl_helper_i915.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -678,7 +678,7 @@ bool IoctlHelperI915::isPreemptionSupported() { return retVal == 0 && (schedulerCap & I915_SCHEDULER_CAP_PREEMPTION); } -bool IoctlHelperI915::queryDeviceIdAndRevision(const Drm &drm) { +bool IoctlHelperI915::queryDeviceIdAndRevision(Drm &drm) { HardwareInfo *hwInfo = drm.getRootDeviceEnvironment().getMutableHardwareInfo(); auto fileDescriptor = drm.getFileDescriptor(); @@ -707,4 +707,5 @@ bool IoctlHelperI915::queryDeviceIdAndRevision(const Drm &drm) { hwInfo->platform.usRevId = param; return true; } + } // namespace NEO diff --git a/shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp b/shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp index 96346b3752..e042b6c0e5 100644 --- a/shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp +++ b/shared/source/os_interface/linux/xe/ioctl_helper_xe.cpp @@ -37,6 +37,14 @@ #define STRINGIFY_ME(X) return #X #define RETURN_ME(X) return X +#ifndef DRM_XE_VM_BIND_FLAG_SYSTEM_ALLOCATOR +#define DRM_XE_VM_BIND_FLAG_SYSTEM_ALLOCATOR (1 << 4) +#endif + +#ifndef DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR +#define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR (1 << 1) +#endif + namespace NEO { const char *IoctlHelperXe::xeGetClassName(int className) { @@ -127,7 +135,7 @@ IoctlHelperXe::IoctlHelperXe(Drm &drmArg) : IoctlHelper(drmArg) { xeLog("IoctlHelperXe::IoctlHelperXe\n", ""); } -bool IoctlHelperXe::queryDeviceIdAndRevision(const Drm &drm) { +bool IoctlHelperXe::queryDeviceIdAndRevision(Drm &drm) { auto fileDescriptor = drm.getFileDescriptor(); drm_xe_device_query queryConfig = {}; @@ -153,6 +161,10 @@ bool IoctlHelperXe::queryDeviceIdAndRevision(const Drm &drm) { auto hwInfo = drm.getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->platform.usDeviceID = config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] & 0xffff; hwInfo->platform.usRevId = static_cast((config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] >> 16) & 0xff); + + if ((debugManager.flags.EnableSharedSystemUsmSupport.get() != 0) && (config->info[DRM_XE_QUERY_CONFIG_FLAGS] & DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR)) { + drm.setSharedSystemAllocEnable(true); + } return true; } @@ -1352,22 +1364,25 @@ int IoctlHelperXe::xeVmBind(const VmBindParams &vmBindParams, bool isBind) { drm_xe_vm_bind bind = {}; bind.vm_id = vmBindParams.vmId; - bind.num_syncs = 1; + bind.num_binds = 1; bind.bind.range = vmBindParams.length; - bind.bind.addr = gmmHelper->decanonize(vmBindParams.start); bind.bind.obj_offset = vmBindParams.offset; bind.bind.pat_index = static_cast(vmBindParams.patIndex); bind.bind.extensions = vmBindParams.extensions; bind.bind.flags = static_cast(vmBindParams.flags); - UNRECOVERABLE_IF(vmBindParams.userFence == 0x0); drm_xe_sync sync[1] = {}; - + if (vmBindParams.sharedSystemUsmBind == true) { + bind.bind.addr = 0; + } else { + bind.bind.addr = gmmHelper->decanonize(vmBindParams.start); + } + bind.num_syncs = 1; + UNRECOVERABLE_IF(vmBindParams.userFence == 0x0); auto xeBindExtUserFence = reinterpret_cast(vmBindParams.userFence); UNRECOVERABLE_IF(xeBindExtUserFence->tag != UserFenceExtension::tagValue); - sync[0].type = DRM_XE_SYNC_TYPE_USER_FENCE; sync[0].flags = DRM_XE_SYNC_FLAG_SIGNAL; sync[0].addr = xeBindExtUserFence->addr; @@ -1383,11 +1398,17 @@ int IoctlHelperXe::xeVmBind(const VmBindParams &vmBindParams, bool isBind) { bind.bind.obj_offset = userptr; } } else { - bind.bind.op = DRM_XE_VM_BIND_OP_UNMAP; - bind.bind.obj = 0; - if (userptr) { - bind.bind.obj_offset = userptr; + if (vmBindParams.sharedSystemUsmEnabled) { + // Use of MAP on unbind required for restoring the address space to the system allocator + bind.bind.op = DRM_XE_VM_BIND_OP_MAP; + bind.bind.flags |= DRM_XE_VM_BIND_FLAG_SYSTEM_ALLOCATOR; + } else { + bind.bind.op = DRM_XE_VM_BIND_OP_UNMAP; + if (userptr) { + bind.bind.obj_offset = userptr; + } } + bind.bind.obj = 0; } ret = IoctlHelper::ioctl(DrmIoctl::gemVmBind, &bind); @@ -1738,4 +1759,5 @@ void IoctlHelperXe::querySupportedFeatures() { }; supportedFeatures.flags.pageFault = checkVmCreateFlagsSupport(DRM_XE_VM_CREATE_FLAG_LR_MODE | DRM_XE_VM_CREATE_FLAG_FAULT_MODE); }; + } // namespace NEO diff --git a/shared/source/os_interface/linux/xe/ioctl_helper_xe.h b/shared/source/os_interface/linux/xe/ioctl_helper_xe.h index 06372870dd..25df1c230d 100644 --- a/shared/source/os_interface/linux/xe/ioctl_helper_xe.h +++ b/shared/source/os_interface/linux/xe/ioctl_helper_xe.h @@ -36,8 +36,7 @@ class IoctlHelperXe : public IoctlHelper { public: using IoctlHelper::IoctlHelper; static std::unique_ptr create(Drm &drmArg); - static bool queryDeviceIdAndRevision(const Drm &drm); - + static bool queryDeviceIdAndRevision(Drm &drm); IoctlHelperXe(Drm &drmArg); ~IoctlHelperXe() override; int ioctl(DrmIoctl request, void *arg) override; diff --git a/shared/test/common/libult/linux/drm_mock.h b/shared/test/common/libult/linux/drm_mock.h index 390dee24b0..301f59ba8b 100644 --- a/shared/test/common/libult/linux/drm_mock.h +++ b/shared/test/common/libult/linux/drm_mock.h @@ -38,6 +38,7 @@ class DrmMock : public Drm { using Drm::generateUUID; using Drm::getQueueSliceCount; using Drm::ioctlHelper; + using Drm::isSharedSystemAllocEnabled; using Drm::l3CacheInfo; using Drm::memoryInfo; using Drm::memoryInfoQueried; @@ -51,6 +52,7 @@ class DrmMock : public Drm { using Drm::queryDeviceIdAndRevision; using Drm::requirePerContextVM; using Drm::setPairAvailable; + using Drm::setSharedSystemAllocEnable; using Drm::setupIoctlHelper; using Drm::sliceCountChangeSupported; using Drm::systemInfo; diff --git a/shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp b/shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp index 99a43977b2..b2d42828de 100644 --- a/shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/xe/ioctl_helper_xe_tests.cpp @@ -18,6 +18,11 @@ #include "shared/test/common/os_interface/linux/xe/mock_ioctl_helper_xe.h" #include "shared/test/common/os_interface/linux/xe/xe_config_fixture.h" #include "shared/test/common/test_macros/test.h" + +#ifndef DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR +#define DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR (1 << 1) +#endif + using namespace NEO; using IoctlHelperXeTest = Test; @@ -2596,3 +2601,79 @@ TEST_F(IoctlHelperXeTest, whenQueryDeviceIdAndRevisionThenProperValuesAreSet) { EXPECT_EQ(mockDeviceId, hwInfo->platform.usDeviceID); EXPECT_EQ(mockRevisionId, hwInfo->platform.usRevId); } + +TEST_F(IoctlHelperXeTest, whenQueryDeviceIdAndRevisionConfigFlagHasGpuAddrMirrorSetThenSharedSystemAllocEnableTrue) { + MockExecutionEnvironment executionEnvironment{}; + std::unique_ptr drm{Drm::create(std::make_unique(0, ""), *executionEnvironment.rootDeviceEnvironments[0])}; + DebugManagerStateRestore restore; + mockIoctl = [](int fileDescriptor, unsigned long int request, void *arg) -> int { + if (request == DRM_IOCTL_XE_DEVICE_QUERY) { + struct drm_xe_device_query *deviceQuery = static_cast(arg); + if (deviceQuery->query == DRM_XE_DEVICE_QUERY_CONFIG) { + if (deviceQuery->data) { + struct drm_xe_query_config *config = reinterpret_cast(deviceQuery->data); + config->num_params = DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID + 1; + config->info[DRM_XE_QUERY_CONFIG_FLAGS] = DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR; + } else { + deviceQuery->size = (DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID + 1) * sizeof(uint64_t); + } + return 0; + } + } + return -1; + }; + + EXPECT_TRUE(IoctlHelperXe::queryDeviceIdAndRevision(*drm)); + EXPECT_TRUE(drm->isSharedSystemAllocEnabled()); +} + +TEST_F(IoctlHelperXeTest, whenQueryDeviceIdAndRevisionAndConfigFlagHasGpuAddrMirrorClearThenSharedSystemAllocEnableFalse) { + MockExecutionEnvironment executionEnvironment{}; + std::unique_ptr drm{Drm::create(std::make_unique(0, ""), *executionEnvironment.rootDeviceEnvironments[0])}; + DebugManagerStateRestore restore; + mockIoctl = [](int fileDescriptor, unsigned long int request, void *arg) -> int { + if (request == DRM_IOCTL_XE_DEVICE_QUERY) { + struct drm_xe_device_query *deviceQuery = static_cast(arg); + if (deviceQuery->query == DRM_XE_DEVICE_QUERY_CONFIG) { + if (deviceQuery->data) { + struct drm_xe_query_config *config = reinterpret_cast(deviceQuery->data); + config->num_params = DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID + 1; + config->info[DRM_XE_QUERY_CONFIG_FLAGS] = 0; + } else { + deviceQuery->size = (DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID + 1) * sizeof(uint64_t); + } + return 0; + } + } + return -1; + }; + + EXPECT_TRUE(IoctlHelperXe::queryDeviceIdAndRevision(*drm)); + EXPECT_FALSE(drm->isSharedSystemAllocEnabled()); +} + +TEST_F(IoctlHelperXeTest, whenQueryDeviceIdAndRevisionAndSharedSystemUsmSupportDebugFlagClearThenSharedSystemAllocEnableFalse) { + MockExecutionEnvironment executionEnvironment{}; + std::unique_ptr drm{Drm::create(std::make_unique(0, ""), *executionEnvironment.rootDeviceEnvironments[0])}; + DebugManagerStateRestore restore; + debugManager.flags.EnableSharedSystemUsmSupport.set(0); + mockIoctl = [](int fileDescriptor, unsigned long int request, void *arg) -> int { + if (request == DRM_IOCTL_XE_DEVICE_QUERY) { + struct drm_xe_device_query *deviceQuery = static_cast(arg); + if (deviceQuery->query == DRM_XE_DEVICE_QUERY_CONFIG) { + if (deviceQuery->data) { + struct drm_xe_query_config *config = reinterpret_cast(deviceQuery->data); + config->num_params = DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID + 1; + config->info[DRM_XE_QUERY_CONFIG_FLAGS] = DRM_XE_QUERY_CONFIG_FLAG_HAS_CPU_ADDR_MIRROR; + } else { + deviceQuery->size = (DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID + 1) * sizeof(uint64_t); + } + return 0; + } + } + return -1; + }; + + EXPECT_TRUE(IoctlHelperXe::queryDeviceIdAndRevision(*drm)); + EXPECT_FALSE(drm->isSharedSystemAllocEnabled()); +}