Files
compute-runtime/shared/source/os_interface/linux/drm_buffer_object.cpp
Lukasz Jobczyk 5bcf500c13 fix: Extend ULLS light mutex range
If some operatioins requires ULLS light stop, execute such operations
under mutex in pair with ULLS stop to ensure no other thread will start
ULLS.

Related-To: NEO-14406, NEO-13922

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
2025-04-07 17:35:04 +02:00

373 lines
16 KiB
C++

/*
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/os_interface/linux/drm_buffer_object.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/task_count_helper.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/os_interface/linux/drm_memory_manager.h"
#include "shared/source/os_interface/linux/drm_memory_operations_handler.h"
#include "shared/source/os_interface/linux/drm_neo.h"
#include "shared/source/os_interface/linux/drm_wrappers.h"
#include "shared/source/os_interface/linux/ioctl_helper.h"
#include "shared/source/os_interface/linux/os_context_linux.h"
#include "shared/source/os_interface/os_context.h"
#include <errno.h>
#include <string.h>
#include <sys/ioctl.h>
#include <unistd.h>
namespace NEO {
BufferObjectHandleWrapper BufferObjectHandleWrapper::acquireSharedOwnership() {
if (controlBlock == nullptr) {
controlBlock = new ControlBlock{1, 0};
}
std::lock_guard lock{controlBlock->blockMutex};
controlBlock->refCount++;
return BufferObjectHandleWrapper{boHandle, rootDeviceIndex, Ownership::strong, controlBlock};
}
BufferObjectHandleWrapper BufferObjectHandleWrapper::acquireWeakOwnership() {
if (controlBlock == nullptr) {
controlBlock = new ControlBlock{1, 0};
}
std::lock_guard lock{controlBlock->blockMutex};
controlBlock->weakRefCount++;
return BufferObjectHandleWrapper{boHandle, rootDeviceIndex, Ownership::weak, controlBlock};
}
BufferObjectHandleWrapper::~BufferObjectHandleWrapper() {
if (controlBlock == nullptr) {
return;
}
std::unique_lock lock{controlBlock->blockMutex};
if (ownership == Ownership::strong) {
controlBlock->refCount--;
} else {
controlBlock->weakRefCount--;
}
if (controlBlock->refCount == 0 && controlBlock->weakRefCount == 0) {
lock.unlock();
delete controlBlock;
controlBlock = nullptr;
}
}
bool BufferObjectHandleWrapper::canCloseBoHandle() {
if (controlBlock == nullptr) {
return true;
}
std::lock_guard lock{controlBlock->blockMutex};
return controlBlock->refCount == 1;
}
BufferObject::BufferObject(uint32_t rootDeviceIndex, Drm *drm, uint64_t patIndex, int handle, size_t size, size_t maxOsContextCount)
: BufferObject(rootDeviceIndex, drm, patIndex, BufferObjectHandleWrapper{handle, rootDeviceIndex}, size, maxOsContextCount) {}
BufferObject::BufferObject(uint32_t rootDeviceIndex, Drm *drm, uint64_t patIndex, BufferObjectHandleWrapper &&handle, size_t size, size_t maxOsContextCount)
: drm(drm), handle(std::move(handle)), size(size), refCount(1), rootDeviceIndex(rootDeviceIndex) {
auto ioctlHelper = drm->getIoctlHelper();
this->tilingMode = ioctlHelper->getDrmParamValue(DrmParam::tilingNone);
this->lockedAddress = nullptr;
this->patIndex = patIndex;
perContextVmsUsed = drm->isPerContextVMRequired();
requiresExplicitResidency = drm->hasPageFaultSupport();
if (perContextVmsUsed) {
bindInfo.resize(maxOsContextCount);
for (auto &iter : bindInfo) {
iter.fill(false);
}
} else {
bindInfo.resize(1);
bindInfo[0].fill(false);
}
}
uint32_t BufferObject::getRefCount() const {
return this->refCount.load();
}
void BufferObject::setAddress(uint64_t address) {
auto gmmHelper = drm->getRootDeviceEnvironment().getGmmHelper();
this->gpuAddress = gmmHelper->canonize(address);
}
bool BufferObject::close() {
if (!this->handle.canCloseBoHandle()) {
PRINT_DEBUG_STRING(debugManager.flags.PrintBOCreateDestroyResult.get(), stdout, "Skipped closing BO-%d - more shared users!\n", this->handle.getBoHandle());
return true;
}
GemClose close{};
close.handle = this->handle.getBoHandle();
close.userptr = this->userptr;
PRINT_DEBUG_STRING(debugManager.flags.PrintBOCreateDestroyResult.get(), stdout, "Calling gem close on handle: BO-%d\n", this->handle.getBoHandle());
auto ioctlHelper = this->drm->getIoctlHelper();
int ret = ioctlHelper->ioctl(DrmIoctl::gemClose, &close);
if (ret != 0) {
int err = errno;
CREATE_DEBUG_STRING(str, "ioctl(GEM_CLOSE) failed with %d. errno=%d(%s)\n", ret, err, strerror(err));
drm->getRootDeviceEnvironment().executionEnvironment.setErrorDescription(std::string(str.get()));
PRINT_DEBUG_STRING(debugManager.flags.PrintDebugMessages.get(), stderr, str.get());
DEBUG_BREAK_IF(true);
return false;
}
this->handle.setBoHandle(-1);
return true;
}
int BufferObject::wait(int64_t timeoutNs) {
if (this->drm->isVmBindAvailable()) {
return 0;
}
int ret = this->drm->waitHandle(this->handle.getBoHandle(), -1);
UNRECOVERABLE_IF(ret != 0);
return ret;
}
bool BufferObject::setTiling(uint32_t mode, uint32_t stride) {
if (this->tilingMode == mode) {
return true;
}
GemSetTiling setTiling{};
setTiling.handle = this->handle.getBoHandle();
setTiling.tilingMode = mode;
setTiling.stride = stride;
auto ioctlHelper = this->drm->getIoctlHelper();
if (!ioctlHelper->setGemTiling(&setTiling)) {
return false;
}
this->tilingMode = setTiling.tilingMode;
return setTiling.tilingMode == mode;
}
uint32_t BufferObject::getOsContextId(OsContext *osContext) {
return perContextVmsUsed ? osContext->getContextId() : 0u;
}
void BufferObject::fillExecObject(ExecObject &execObject, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId) {
const auto osContextId = drm->isPerContextVMRequired() ? osContext->getContextId() : 0;
auto ioctlHelper = drm->getIoctlHelper();
ioctlHelper->fillExecObject(execObject, this->handle.getBoHandle(), this->gpuAddress, drmContextId, this->bindInfo[osContextId][vmHandleId], this->isMarkedForCapture());
}
int BufferObject::exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId,
BufferObject *const residency[], size_t residencyCount, ExecObject *execObjectsStorage, uint64_t completionGpuAddress, TaskCountType completionValue) {
for (size_t i = 0; i < residencyCount; i++) {
residency[i]->fillExecObject(execObjectsStorage[i], osContext, vmHandleId, drmContextId);
}
this->fillExecObject(execObjectsStorage[residencyCount], osContext, vmHandleId, drmContextId);
auto ioctlHelper = drm->getIoctlHelper();
ExecBuffer execbuf{};
ioctlHelper->fillExecBuffer(execbuf, reinterpret_cast<uintptr_t>(execObjectsStorage),
static_cast<uint32_t>(residencyCount + 1u), static_cast<uint32_t>(startOffset),
alignUp(used, 8), flags, drmContextId);
if (debugManager.flags.PrintExecutionBuffer.get()) {
PRINT_DEBUG_STRING(debugManager.flags.PrintExecutionBuffer.get(), stdout, "Exec called with drmVmId = %u\n",
static_cast<const OsContextLinux *>(osContext)->getDrmVmIds().size() ? static_cast<const OsContextLinux *>(osContext)->getDrmVmIds()[vmHandleId] : 0);
printExecutionBuffer(execbuf, residencyCount, execObjectsStorage, residency);
}
int ret = ioctlHelper->execBuffer(&execbuf, completionGpuAddress, completionValue);
if (ret != 0) {
int err = this->drm->getErrno();
if (err == EOPNOTSUPP) {
PRINT_DEBUG_STRING(debugManager.flags.PrintDebugMessages.get(), stderr, "ioctl(I915_GEM_EXECBUFFER2) failed with %d. errno=%d(%s)\n", ret, err, strerror(err));
return err;
}
evictUnusedAllocations(false, true);
ret = ioctlHelper->execBuffer(&execbuf, completionGpuAddress, completionValue);
}
if (ret != 0) {
const auto status = evictUnusedAllocations(true, true);
if (status == MemoryOperationsStatus::gpuHangDetectedDuringOperation) {
PRINT_DEBUG_STRING(debugManager.flags.PrintDebugMessages.get(), stderr, "Error! GPU hang detected in BufferObject::exec(). Returning %d\n", gpuHangDetected);
return gpuHangDetected;
}
ret = ioctlHelper->execBuffer(&execbuf, completionGpuAddress, completionValue);
}
if (ret == 0) {
return 0;
}
int err = this->drm->getErrno();
PRINT_DEBUG_STRING(debugManager.flags.PrintDebugMessages.get(), stderr, "ioctl(I915_GEM_EXECBUFFER2) failed with %d. errno=%d(%s)\n", ret, err, strerror(err));
return err;
}
MemoryOperationsStatus BufferObject::evictUnusedAllocations(bool waitForCompletion, bool isLockNeeded) {
return static_cast<DrmMemoryOperationsHandler *>(this->drm->getRootDeviceEnvironment().memoryOperationsInterface.get())->evictUnusedAllocations(waitForCompletion, isLockNeeded);
}
void BufferObject::printBOBindingResult(OsContext *osContext, uint32_t vmHandleId, bool bind, int retVal) {
if (retVal == 0) {
if (bind) {
PRINT_DEBUG_STRING(debugManager.flags.PrintBOBindingResult.get(), stdout, "bind BO-%d to VM %u, drmVmId = %u, range: %llx - %llx, size: %lld, result: %d\n",
this->handle.getBoHandle(), vmHandleId, static_cast<const OsContextLinux *>(osContext)->getDrmVmIds().size() ? static_cast<const OsContextLinux *>(osContext)->getDrmVmIds()[vmHandleId] : 0, this->gpuAddress, ptrOffset(this->gpuAddress, this->size), this->size, retVal);
} else {
PRINT_DEBUG_STRING(debugManager.flags.PrintBOBindingResult.get(), stdout, "unbind BO-%d from VM %u, drmVmId = %u, range: %llx - %llx, size: %lld, result: %d\n",
this->handle.getBoHandle(), vmHandleId, static_cast<const OsContextLinux *>(osContext)->getDrmVmIds().size() ? static_cast<const OsContextLinux *>(osContext)->getDrmVmIds()[vmHandleId] : 0, this->gpuAddress, ptrOffset(this->gpuAddress, this->size), this->size, retVal);
}
} else {
auto err = this->drm->getErrno();
if (bind) {
PRINT_DEBUG_STRING(debugManager.flags.PrintBOBindingResult.get(), stderr, "bind BO-%d to VM %u, drmVmId = %u, range: %llx - %llx, size: %lld, result: %d, errno: %d(%s)\n",
this->handle.getBoHandle(), vmHandleId, static_cast<const OsContextLinux *>(osContext)->getDrmVmIds().size() ? static_cast<const OsContextLinux *>(osContext)->getDrmVmIds()[vmHandleId] : 0, this->gpuAddress, ptrOffset(this->gpuAddress, this->size), this->size, retVal, err, strerror(err));
} else {
PRINT_DEBUG_STRING(debugManager.flags.PrintBOBindingResult.get(), stderr, "unbind BO-%d from VM %u, drmVmId = %u, range: %llx - %llx, size: %lld, result: %d, errno: %d(%s)\n",
this->handle.getBoHandle(), vmHandleId, static_cast<const OsContextLinux *>(osContext)->getDrmVmIds().size() ? static_cast<const OsContextLinux *>(osContext)->getDrmVmIds()[vmHandleId] : 0, this->gpuAddress, ptrOffset(this->gpuAddress, this->size), this->size, retVal, err, strerror(err));
}
}
}
int BufferObject::bind(OsContext *osContext, uint32_t vmHandleId, const bool forcePagingFence) {
int retVal = 0;
auto contextId = getOsContextId(osContext);
if (!this->bindInfo[contextId][vmHandleId]) {
retVal = this->drm->bindBufferObject(osContext, vmHandleId, this, forcePagingFence);
if (debugManager.flags.PrintBOBindingResult.get()) {
printBOBindingResult(osContext, vmHandleId, true, retVal);
}
if (!retVal) {
this->bindInfo[contextId][vmHandleId] = true;
}
}
return retVal;
}
int BufferObject::unbind(OsContext *osContext, uint32_t vmHandleId) {
int retVal = 0;
auto contextId = getOsContextId(osContext);
if (this->bindInfo[contextId][vmHandleId]) {
retVal = this->drm->unbindBufferObject(osContext, vmHandleId, this);
if (debugManager.flags.PrintBOBindingResult.get()) {
printBOBindingResult(osContext, vmHandleId, false, retVal);
}
if (!retVal) {
this->bindInfo[contextId][vmHandleId] = false;
}
}
return retVal;
}
void BufferObject::printExecutionBuffer(ExecBuffer &execbuf, const size_t &residencyCount, ExecObject *execObjectsStorage, BufferObject *const residency[]) {
auto ioctlHelper = drm->getIoctlHelper();
std::stringstream logger;
ioctlHelper->logExecBuffer(execbuf, logger);
size_t i;
for (i = 0; i < residencyCount; i++) {
ioctlHelper->logExecObject(execObjectsStorage[i], logger, residency[i]->peekSize());
}
logger << "Command ";
ioctlHelper->logExecObject(execObjectsStorage[i], logger, this->peekSize());
printf("%s\n", logger.str().c_str());
}
int bindBOsWithinContext(BufferObject *const boToPin[], size_t numberOfBos, OsContext *osContext, uint32_t vmHandleId, const bool forcePagingFence) {
auto retVal = 0;
for (auto drmIterator = 0u; drmIterator < osContext->getDeviceBitfield().size(); drmIterator++) {
if (osContext->getDeviceBitfield().test(drmIterator)) {
for (size_t i = 0; i < numberOfBos; i++) {
retVal |= boToPin[i]->bind(osContext, drmIterator, forcePagingFence);
}
}
}
return retVal;
}
int BufferObject::pin(BufferObject *const boToPin[], size_t numberOfBos, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId) {
auto retVal = 0;
if (this->drm->isVmBindAvailable()) {
auto lock = static_cast<DrmMemoryOperationsHandler *>(this->drm->getRootDeviceEnvironment().memoryOperationsInterface.get())->lockHandlerIfUsed();
retVal = bindBOsWithinContext(boToPin, numberOfBos, osContext, vmHandleId, false);
} else {
StackVec<ExecObject, maxFragmentsCount + 1> execObject(numberOfBos + 1);
retVal = this->exec(4u, 0u, 0u, false, osContext, vmHandleId, drmContextId, boToPin, numberOfBos, &execObject[0], 0, 0);
}
return retVal;
}
int BufferObject::validateHostPtr(BufferObject *const boToPin[], size_t numberOfBos, OsContext *osContext, uint32_t vmHandleId, uint32_t drmContextId) {
auto retVal = 0;
if (this->drm->isVmBindAvailable()) {
auto lock = static_cast<DrmMemoryOperationsHandler *>(this->drm->getRootDeviceEnvironment().memoryOperationsInterface.get())->lockHandlerIfUsed();
for (size_t i = 0; i < numberOfBos; i++) {
retVal = boToPin[i]->bind(osContext, vmHandleId, false);
if (retVal) {
break;
}
}
} else {
StackVec<std::unique_lock<NEO::CommandStreamReceiver::MutexType>, 1> locks{};
if (this->drm->getRootDeviceEnvironment().executionEnvironment.memoryManager.get()) {
const auto &engines = this->drm->getRootDeviceEnvironment().executionEnvironment.memoryManager->getRegisteredEngines(osContext->getRootDeviceIndex());
for (const auto &engine : engines) {
if (engine.osContext->isDirectSubmissionLightActive()) {
locks.push_back(engine.commandStreamReceiver->obtainUniqueOwnership());
engine.commandStreamReceiver->stopDirectSubmission(false, false);
}
}
}
StackVec<ExecObject, maxFragmentsCount + 1> execObject(numberOfBos + 1);
retVal = this->exec(4u, 0u, 0u, false, osContext, vmHandleId, drmContextId, boToPin, numberOfBos, &execObject[0], 0, 0);
}
return retVal;
}
void BufferObject::addBindExtHandle(uint32_t handle) {
bindExtHandles.push_back(handle);
}
} // namespace NEO