fix: don't create fake memory regions in xe ioctl helper

populate memory info based on mem usage and gts info
propagate error from xeWaitUserFence function

Related-To: NEO-7931

Co-authored-by: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2023-05-04 10:13:08 +00:00
committed by Compute-Runtime-Automation
parent b5300b253d
commit 87aab0bf6e
4 changed files with 218 additions and 83 deletions

View File

@ -145,7 +145,7 @@ class IoctlHelper {
virtual void *pciBarrierMmap() { return nullptr; };
uint32_t getFlagsForPrimeHandleToFd() const;
std::unique_ptr<MemoryInfo> createMemoryInfo();
virtual std::unique_ptr<MemoryInfo> createMemoryInfo();
virtual std::unique_ptr<EngineInfo> createEngineInfo(bool isSysmanEnabled);
protected:

View File

@ -20,6 +20,7 @@
#include "shared/source/helpers/string.h"
#include "shared/source/os_interface/linux/drm_neo.h"
#include "shared/source/os_interface/linux/engine_info.h"
#include "shared/source/os_interface/linux/memory_info.h"
#include "shared/source/os_interface/linux/os_context_linux.h"
#include "drm/i915_drm_prelim.h"
@ -60,9 +61,6 @@ int IoctlHelperXe::xeGetQuery(Query *data) {
QueryItem *queryItem = (QueryItem *)data->itemsPtr;
std::vector<uint8_t> *queryData = nullptr;
switch (queryItem->queryId) {
case static_cast<int>(DrmParam::QueryMemoryRegions):
queryData = &memQueryFakei915;
break;
case static_cast<int>(DrmParam::QueryHwconfigTable):
queryData = &hwconfigFakei915;
break;
@ -197,59 +195,6 @@ bool IoctlHelperXe::initialize() {
hasVram = config->info[XE_QUERY_CONFIG_FLAGS] & XE_QUERY_CONFIG_FLAGS_HAS_VRAM ? 1 : 0;
addressWidth = static_cast<uint32_t>(config->info[XE_QUERY_CONFIG_VA_BITS]);
memset(&queryConfig, 0, sizeof(queryConfig));
queryConfig.query = DRM_XE_DEVICE_QUERY_GTS;
IoctlHelper::ioctl(DrmIoctl::Query, &queryConfig);
auto dataGts = std::vector<uint8_t>(sizeof(drm_xe_query_config) + sizeof(uint64_t) * queryConfig.size, 0);
struct drm_xe_query_gts *gts = reinterpret_cast<struct drm_xe_query_gts *>(dataGts.data());
queryConfig.data = castToUint64(gts);
IoctlHelper::ioctl(DrmIoctl::Query, &queryConfig);
for (uint32_t i = 0; i < gts->num_gt; i++) {
xeMemoryRegions |= gts->gts[i].native_mem_regions | gts->gts[i].slow_mem_regions;
xeTimestampFrequency = gts->gts[i].clock_freq;
}
xeLog("xeMemoryRegions 0x%llx\n", xeMemoryRegions);
memset(&queryConfig, 0, sizeof(queryConfig));
queryConfig.query = DRM_XE_DEVICE_QUERY_MEM_USAGE;
IoctlHelper::ioctl(DrmIoctl::Query, &queryConfig);
auto dataMem = std::vector<uint8_t>(sizeof(drm_xe_query_config) + sizeof(uint64_t) * queryConfig.size, 0);
struct drm_xe_query_mem_usage *configMem = reinterpret_cast<struct drm_xe_query_mem_usage *>(dataMem.data());
queryConfig.data = castToUint64(configMem);
IoctlHelper::ioctl(DrmIoctl::Query, &queryConfig);
memQueryFakei915.resize(sizeof(drm_i915_query_memory_regions) + (configMem->num_regions * sizeof(drm_i915_memory_region_info)));
struct drm_i915_query_memory_regions *i915MemQuery = reinterpret_cast<struct drm_i915_query_memory_regions *>(memQueryFakei915.data());
i915MemQuery->num_regions = configMem->num_regions;
for (uint32_t i = 0; i < configMem->num_regions; i++) {
const char *memName = NULL;
uint16_t memClass = 0;
uint16_t memInst = configMem->regions[i].instance;
switch (configMem->regions[i].mem_class) {
case XE_MEM_REGION_CLASS_SYSMEM:
memName = "SYSMEM";
memClass = getDrmParamValue(DrmParam::MemoryClassSystem);
break;
case XE_MEM_REGION_CLASS_VRAM:
memName = "VRAM";
memClass = getDrmParamValue(DrmParam::MemoryClassDevice);
memInst--;
break;
default:
xeLog("Unhandled Xe memory class", "");
UNRECOVERABLE_IF(true);
break;
}
i915MemQuery->regions[i].region.memory_class = memClass;
i915MemQuery->regions[i].region.memory_instance = memInst;
i915MemQuery->regions[i].probed_size = configMem->regions[i].total_size;
i915MemQuery->regions[i].unallocated_size = configMem->regions[i].total_size - configMem->regions[i].used;
xeLog(" %s c=0x%x i=%d T=%llx U=0x%llx / i915: %d %d\n",
memName, configMem->regions[i].mem_class, configMem->regions[i].instance,
configMem->regions[i].total_size, configMem->regions[i].used, memClass, memInst);
}
memset(&queryConfig, 0, sizeof(queryConfig));
queryConfig.query = DRM_XE_DEVICE_QUERY_HWCONFIG;
IoctlHelper::ioctl(DrmIoctl::Query, &queryConfig);
@ -340,6 +285,11 @@ std::unique_ptr<EngineInfo> IoctlHelperXe::createEngineInfo(bool isSysmanEnabled
sizeof(struct drm_xe_engine_class_instance);
xeLog("numberHwEngines=%d\n", numberHwEngines);
if (enginesData.empty()) {
return {};
}
auto queriedEngines = reinterpret_cast<struct drm_xe_engine_class_instance *>(enginesData.data());
StackVec<std::vector<EngineClassInstance>, 2> enginesPerTile{};
@ -368,6 +318,52 @@ std::unique_ptr<EngineInfo> IoctlHelperXe::createEngineInfo(bool isSysmanEnabled
return std::make_unique<EngineInfo>(&drm, enginesPerTile);
}
inline MemoryRegion createMemoryRegionFromXeMemRegion(const drm_xe_query_mem_usage::drm_xe_query_mem_region &xeMemRegion) {
MemoryRegion memoryRegion{};
memoryRegion.region.memoryInstance = xeMemRegion.instance;
memoryRegion.region.memoryClass = xeMemRegion.mem_class;
memoryRegion.probedSize = xeMemRegion.total_size;
memoryRegion.unallocatedSize = xeMemRegion.total_size - xeMemRegion.used;
return memoryRegion;
}
std::unique_ptr<MemoryInfo> IoctlHelperXe::createMemoryInfo() {
auto memUsageData = queryData(DRM_XE_DEVICE_QUERY_MEM_USAGE);
auto gtsData = queryData(DRM_XE_DEVICE_QUERY_GTS);
if (memUsageData.empty() || gtsData.empty()) {
return {};
}
MemoryInfo::RegionContainer regionsContainer{};
auto xeMemUsageData = reinterpret_cast<drm_xe_query_mem_usage *>(memUsageData.data());
auto xeGtsData = reinterpret_cast<drm_xe_query_gts *>(gtsData.data());
std::array<drm_xe_query_mem_usage::drm_xe_query_mem_region *, 64> memoryRegionInstances{};
for (auto i = 0u; i < xeMemUsageData->num_regions; i++) {
auto &region = xeMemUsageData->regions[i];
memoryRegionInstances[region.instance] = &region;
if (region.mem_class == XE_MEM_REGION_CLASS_SYSMEM) {
regionsContainer.push_back(createMemoryRegionFromXeMemRegion(region));
}
}
if (regionsContainer.empty()) {
return {};
}
for (auto i = 0u; i < xeGtsData->num_gt; i++) {
uint64_t nativeMemRegions = xeGtsData->gts[i].native_mem_regions;
auto regionIndex = Math::log2(nativeMemRegions);
UNRECOVERABLE_IF(!memoryRegionInstances[regionIndex]);
regionsContainer.push_back(createMemoryRegionFromXeMemRegion(*memoryRegionInstances[regionIndex]));
xeTimestampFrequency = xeGtsData->gts[i].clock_freq;
}
return std::make_unique<MemoryInfo>(regionsContainer, drm);
}
int IoctlHelperXe::createGemExt(const MemRegionsVec &memClassInstances, size_t allocSize, uint32_t &handle, std::optional<uint32_t> vmId, int32_t pairHandle) {
struct drm_xe_gem_create create = {};
uint32_t regionsSize = static_cast<uint32_t>(memClassInstances.size());
@ -383,18 +379,12 @@ int IoctlHelperXe::createGemExt(const MemRegionsVec &memClassInstances, size_t a
create.size = allocSize;
MemoryClassInstance mem = memClassInstances[regionsSize - 1];
switch (mem.memoryClass) {
case XE_MEM_REGION_CLASS_SYSMEM:
create.flags = xeMemoryRegions & 0x1;
break;
case XE_MEM_REGION_CLASS_VRAM:
create.flags = xeMemoryRegions & (0x2 << mem.memoryInstance);
break;
default:
xeLog(" wrong memory region: %d\n", mem.memoryClass);
UNRECOVERABLE_IF(true);
break;
std::bitset<32> memoryInstances{};
for (const auto &memoryClassInstance : memClassInstances) {
memoryInstances.set(memoryClassInstance.memoryInstance);
}
create.flags = static_cast<uint32_t>(memoryInstances.to_ulong());
auto ret = IoctlHelper::ioctl(DrmIoctl::GemCreate, &create);
handle = create.handle;
@ -426,10 +416,9 @@ CacheRegion IoctlHelperXe::closFree(CacheRegion closIndex) {
return CacheRegion::None;
}
void IoctlHelperXe::xeWaitUserFence(uint64_t mask, uint16_t op, uint64_t addr, uint64_t value,
struct drm_xe_engine_class_instance *eci,
int64_t timeout) {
int ret;
int IoctlHelperXe::xeWaitUserFence(uint64_t mask, uint16_t op, uint64_t addr, uint64_t value,
struct drm_xe_engine_class_instance *eci,
int64_t timeout) {
struct drm_xe_wait_user_fence wait = {};
wait.addr = addr;
wait.op = op;
@ -439,8 +428,11 @@ void IoctlHelperXe::xeWaitUserFence(uint64_t mask, uint16_t op, uint64_t addr, u
wait.timeout = timeout;
wait.num_engines = eci ? 1 : 0;
wait.instances = eci ? castToUint64(eci) : 0;
ret = IoctlHelper::ioctl(DrmIoctl::GemWaitUserFence, &wait);
UNRECOVERABLE_IF(ret);
auto retVal = IoctlHelper::ioctl(DrmIoctl::GemWaitUserFence, &wait);
xeLog(" -> IoctlHelperXe::%s a=0x%llx v=0x%llx engine=[0x%x, 0x%x] T=0x%llx F=0x%x retVal=0x%x\n", __FUNCTION__, addr, value,
eci ? eci->engine_class : -1, eci ? eci->engine_instance : -1,
timeout, wait.flags, retVal);
return retVal;
}
int IoctlHelperXe::waitUserFence(uint32_t ctxId, uint64_t address,
@ -466,7 +458,7 @@ int IoctlHelperXe::waitUserFence(uint32_t ctxId, uint64_t address,
timeout = TimeoutControls::maxTimeout;
}
if (address) {
xeWaitUserFence(mask, DRM_XE_UFENCE_WAIT_GTE, address, value, NULL, timeout);
return xeWaitUserFence(mask, DRM_XE_UFENCE_WAIT_GTE, address, value, NULL, timeout);
}
return 0;
}
@ -1237,9 +1229,9 @@ int IoctlHelperXe::xeVmBind(const VmBindParams &vmBindParams, bool bindOp) {
ret = IoctlHelper::ioctl(DrmIoctl::GemVmBind, &bind);
if (!bindOp) {
xeWaitUserFence(DRM_XE_UFENCE_WAIT_U64, DRM_XE_UFENCE_WAIT_EQ,
sync[0].addr,
sync[0].timeline_value, NULL, XE_ONE_SEC);
return xeWaitUserFence(DRM_XE_UFENCE_WAIT_U64, DRM_XE_UFENCE_WAIT_EQ,
sync[0].addr,
sync[0].timeline_value, NULL, XE_ONE_SEC);
}
}

View File

@ -95,13 +95,14 @@ class IoctlHelperXe : public IoctlHelper {
bool getFabricLatency(uint32_t fabricId, uint32_t &latency, uint32_t &bandwidth) override;
bool isWaitBeforeBindRequired(bool bind) const override;
std::unique_ptr<EngineInfo> createEngineInfo(bool isSysmanEnabled) override;
std::unique_ptr<MemoryInfo> createMemoryInfo() override;
std::vector<uint8_t> xeRebuildi915Topology(std::vector<uint8_t> *geomDss, std::vector<uint8_t> *computeDss, std::vector<uint8_t> *euDss);
private:
template <typename... XeLogArgs>
void xeLog(XeLogArgs &&...args) const;
void xeWaitUserFence(uint64_t mask, uint16_t op, uint64_t addr, uint64_t value, struct drm_xe_engine_class_instance *eci, int64_t timeout);
int xeWaitUserFence(uint64_t mask, uint16_t op, uint64_t addr, uint64_t value, struct drm_xe_engine_class_instance *eci, int64_t timeout);
int xeVmBind(const VmBindParams &vmBindParams, bool bindOp);
uint32_t xeSyncObjCreate(uint32_t flags);
bool xeSyncObjWait(uint32_t *handles, uint32_t count, uint64_t absTimeoutNsec, uint32_t flags, uint32_t *firstSignaled);
@ -129,9 +130,7 @@ class IoctlHelperXe : public IoctlHelper {
std::mutex xeLock;
std::vector<BindInfo> bindInfo;
int instance = 0;
uint64_t xeMemoryRegions = 0;
uint32_t xeTimestampFrequency = 0;
std::vector<uint8_t> memQueryFakei915;
std::vector<uint8_t> hwconfigFakei915;
std::vector<uint8_t> topologyFakei915;
std::vector<drm_xe_engine_class_instance> contextParamEngine;

View File

@ -9,6 +9,7 @@
#include "shared/source/os_interface/linux/engine_info.h"
#include "shared/source/os_interface/linux/i915_prelim.h"
#include "shared/source/os_interface/linux/ioctl_helper.h"
#include "shared/source/os_interface/linux/memory_info.h"
#include "shared/source/os_interface/linux/xe/ioctl_helper_xe.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/default_hw_info.h"
@ -29,6 +30,7 @@ struct MockIoctlHelperXe : IoctlHelperXe {
using IoctlHelperXe::xeGetBindOpName;
using IoctlHelperXe::xeGetClassName;
using IoctlHelperXe::xeGetengineClassName;
using IoctlHelperXe::xeTimestampFrequency;
};
TEST(IoctlHelperXeTest, givenXeDrmVersionsWhenGettingIoctlHelperThenValidIoctlHelperIsReturned) {
@ -454,7 +456,58 @@ inline constexpr int testValueGemCreate = 0x8273;
class DrmMockXe : public DrmMockCustom {
public:
DrmMockXe(RootDeviceEnvironment &rootDeviceEnvironment) : DrmMockCustom(rootDeviceEnvironment){};
DrmMockXe(RootDeviceEnvironment &rootDeviceEnvironment) : DrmMockCustom(rootDeviceEnvironment) {
auto xeQueryMemUsage = reinterpret_cast<drm_xe_query_mem_usage *>(queryMemUsage);
xeQueryMemUsage->num_regions = 3;
xeQueryMemUsage->regions[0] = {
XE_MEM_REGION_CLASS_VRAM, // class
1, // instance
0, // padding
MemoryConstants::pageSize, // min page size
MemoryConstants::pageSize, // max page size
2 * MemoryConstants::gigaByte, // total size
MemoryConstants::megaByte // used size
};
xeQueryMemUsage->regions[1] = {
XE_MEM_REGION_CLASS_SYSMEM, // class
0, // instance
0, // padding
MemoryConstants::pageSize, // min page size
MemoryConstants::pageSize, // max page size
MemoryConstants::gigaByte, // total size
MemoryConstants::kiloByte // used size
};
xeQueryMemUsage->regions[2] = {
XE_MEM_REGION_CLASS_VRAM, // class
2, // instance
0, // padding
MemoryConstants::pageSize, // min page size
MemoryConstants::pageSize, // max page size
4 * MemoryConstants::gigaByte, // total size
MemoryConstants::gigaByte // used size
};
auto xeQueryGts = reinterpret_cast<drm_xe_query_gts *>(queryGts);
xeQueryGts->num_gt = 2;
xeQueryGts->gts[0] = {
XE_QUERY_GT_TYPE_MAIN, // type
0, // instance
12500000, // clock freq
0, // features
0b100, // native mem regions
0x011, // slow mem regions
0 // inaccessible mem regions
};
xeQueryGts->gts[1] = {
XE_QUERY_GT_TYPE_REMOTE, // type
1, // instance
12500000, // clock freq
0, // features
0b010, // native mem regions
0x101, // slow mem regions
0 // inaccessible mem regions
};
}
void testMode(int f, int a = 0) {
forceIoctlAnswer = f;
@ -525,7 +578,19 @@ class DrmMockXe : public DrmMockCustom {
}
deviceQuery->size = sizeof(queryEngines);
break;
}
case DRM_XE_DEVICE_QUERY_MEM_USAGE:
if (deviceQuery->data) {
memcpy_s(reinterpret_cast<void *>(deviceQuery->data), deviceQuery->size, queryMemUsage, sizeof(queryMemUsage));
}
deviceQuery->size = sizeof(queryMemUsage);
break;
case DRM_XE_DEVICE_QUERY_GTS:
if (deviceQuery->data) {
memcpy_s(reinterpret_cast<void *>(deviceQuery->data), deviceQuery->size, queryGts, sizeof(queryGts));
}
deviceQuery->size = sizeof(queryGts);
break;
};
ret = 0;
} break;
case DrmIoctl::GemContextSetparam:
@ -547,6 +612,9 @@ class DrmMockXe : public DrmMockCustom {
{DRM_XE_ENGINE_CLASS_COMPUTE, 6, 1},
{DRM_XE_ENGINE_CLASS_VIDEO_DECODE, 7, 1},
{DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE, 8, 0}};
uint64_t queryMemUsage[37]{}; // 1 qword for num regions and 12 qwords per region
uint64_t queryGts[27]{}; // 1 qword for num gts and 13 qwords per gt
};
TEST(IoctlHelperXeTest, whenCallingIoctlThenProperValueIsReturned) {
@ -793,3 +861,79 @@ TEST(IoctlHelperXeTest, whenCreatingEngineInfoThenProperEnginesAreDiscovered) {
}
}
}
TEST(IoctlHelperXeTest, whenCreatingMemoryInfoThenProperMemoryBanksAreDiscovered) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableLocalMemory.set(1);
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
DrmMockXe drm{*executionEnvironment->rootDeviceEnvironments[0]};
auto xeIoctlHelper = std::make_unique<MockIoctlHelperXe>(drm);
auto memoryInfo = xeIoctlHelper->createMemoryInfo();
EXPECT_NE(nullptr, memoryInfo);
auto memoryClassInstance0 = memoryInfo->getMemoryRegionClassAndInstance(0, *defaultHwInfo);
EXPECT_EQ(static_cast<uint16_t>(XE_MEM_REGION_CLASS_SYSMEM), memoryClassInstance0.memoryClass);
EXPECT_EQ(0u, memoryClassInstance0.memoryInstance);
EXPECT_EQ(MemoryConstants::gigaByte, memoryInfo->getMemoryRegionSize(0));
auto memoryClassInstance1 = memoryInfo->getMemoryRegionClassAndInstance(0b01, *defaultHwInfo);
EXPECT_EQ(static_cast<uint16_t>(XE_MEM_REGION_CLASS_VRAM), memoryClassInstance1.memoryClass);
EXPECT_EQ(2u, memoryClassInstance1.memoryInstance);
EXPECT_EQ(4 * MemoryConstants::gigaByte, memoryInfo->getMemoryRegionSize(0b01));
auto memoryClassInstance2 = memoryInfo->getMemoryRegionClassAndInstance(0b10, *defaultHwInfo);
EXPECT_EQ(static_cast<uint16_t>(XE_MEM_REGION_CLASS_VRAM), memoryClassInstance2.memoryClass);
EXPECT_EQ(1u, memoryClassInstance2.memoryInstance);
EXPECT_EQ(2 * MemoryConstants::gigaByte, memoryInfo->getMemoryRegionSize(0b10));
auto &memoryRegions = memoryInfo->getDrmRegionInfos();
EXPECT_EQ(3u, memoryRegions.size());
EXPECT_EQ(0u, memoryRegions[0].region.memoryInstance);
EXPECT_EQ(MemoryConstants::gigaByte, memoryRegions[0].probedSize);
EXPECT_EQ(MemoryConstants::gigaByte - MemoryConstants::kiloByte, memoryRegions[0].unallocatedSize);
EXPECT_EQ(2u, memoryRegions[1].region.memoryInstance);
EXPECT_EQ(4 * MemoryConstants::gigaByte, memoryRegions[1].probedSize);
EXPECT_EQ(3 * MemoryConstants::gigaByte, memoryRegions[1].unallocatedSize);
EXPECT_EQ(1u, memoryRegions[2].region.memoryInstance);
EXPECT_EQ(2 * MemoryConstants::gigaByte, memoryRegions[2].probedSize);
EXPECT_EQ(2 * MemoryConstants::gigaByte - MemoryConstants::megaByte, memoryRegions[2].unallocatedSize);
EXPECT_EQ(12500000u, xeIoctlHelper->xeTimestampFrequency);
}
TEST(IoctlHelperXeTest, givenIoctlFailureWhenCreatingMemoryInfoThenNoMemoryBanksAreDiscovered) {
DebugManagerStateRestore restorer;
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
DrmMockXe drm{*executionEnvironment->rootDeviceEnvironments[0]};
auto xeIoctlHelper = std::make_unique<MockIoctlHelperXe>(drm);
drm.testMode(1, -1);
auto memoryInfo = xeIoctlHelper->createMemoryInfo();
EXPECT_EQ(nullptr, memoryInfo);
}
TEST(IoctlHelperXeTest, givenNoMemoryRegionsWhenCreatingMemoryInfoThenMemoryInfoIsNotCreated) {
DebugManagerStateRestore restorer;
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
DrmMockXe drm{*executionEnvironment->rootDeviceEnvironments[0]};
auto xeIoctlHelper = std::make_unique<MockIoctlHelperXe>(drm);
auto xeQueryMemUsage = reinterpret_cast<drm_xe_query_mem_usage *>(drm.queryMemUsage);
xeQueryMemUsage->num_regions = 0u;
auto memoryInfo = xeIoctlHelper->createMemoryInfo();
EXPECT_EQ(nullptr, memoryInfo);
}
TEST(IoctlHelperXeTest, givenIoctlFailureWhenCreatingEngineInfoThenNoEnginesAreDiscovered) {
DebugManagerStateRestore restorer;
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
DrmMockXe drm{*executionEnvironment->rootDeviceEnvironments[0]};
auto xeIoctlHelper = std::make_unique<MockIoctlHelperXe>(drm);
drm.testMode(1, -1);
auto engineInfo = xeIoctlHelper->createEngineInfo(true);
EXPECT_EQ(nullptr, engineInfo);
}