From 70ac7ec80ec689b4d80e1a036d911d962d0ae522 Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Fri, 31 Jul 2020 07:45:48 +0200 Subject: [PATCH] Append proper flag to buffer object Related-To: NEO-4338 Change-Id: I87604992fdfc20cd02773999f7c019344e8e3213 Signed-off-by: Lukasz Jobczyk --- .../os_interface/linux/drm_command_stream.h | 2 +- .../os_interface/linux/drm_command_stream.inl | 3 ++- .../linux/drm_command_stream_bdw_plus.inl | 2 +- .../linux/drm_buffer_object_tests.cpp | 20 +++++++++---------- .../linux/drm_memory_manager_tests.cpp | 2 +- .../linux/drm_direct_submission.inl | 5 +++-- .../source/os_interface/linux/CMakeLists.txt | 1 + .../os_interface/linux/drm_buffer_object.cpp | 14 +++++++------ .../os_interface/linux/drm_buffer_object.h | 7 ++++--- .../linux/drm_buffer_object_extended.cpp | 14 +++++++++++++ .../os_interface/linux/drm_memory_manager.cpp | 8 ++++---- 11 files changed, 49 insertions(+), 29 deletions(-) create mode 100644 shared/source/os_interface/linux/drm_buffer_object_extended.cpp diff --git a/opencl/source/os_interface/linux/drm_command_stream.h b/opencl/source/os_interface/linux/drm_command_stream.h index 20bc2e4f8d..cda0c43b3b 100644 --- a/opencl/source/os_interface/linux/drm_command_stream.h +++ b/opencl/source/os_interface/linux/drm_command_stream.h @@ -51,7 +51,7 @@ class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver { protected: MOCKABLE_VIRTUAL void flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency); - MOCKABLE_VIRTUAL void exec(const BatchBuffer &batchBuffer, uint32_t drmContextId); + MOCKABLE_VIRTUAL void exec(const BatchBuffer &batchBuffer, uint32_t vmHandleId, uint32_t drmContextId); std::vector residency; std::vector execObjectsStorage; diff --git a/opencl/source/os_interface/linux/drm_command_stream.inl b/opencl/source/os_interface/linux/drm_command_stream.inl index ce3edb3ab2..99259ad98e 100644 --- a/opencl/source/os_interface/linux/drm_command_stream.inl +++ b/opencl/source/os_interface/linux/drm_command_stream.inl @@ -90,7 +90,7 @@ bool DrmCommandStreamReceiver::flush(BatchBuffer &batchBuffer, Reside } template -void DrmCommandStreamReceiver::exec(const BatchBuffer &batchBuffer, uint32_t drmContextId) { +void DrmCommandStreamReceiver::exec(const BatchBuffer &batchBuffer, uint32_t vmHandleId, uint32_t drmContextId) { DrmAllocation *alloc = static_cast(batchBuffer.commandBufferAllocation); DEBUG_BREAK_IF(!alloc); BufferObject *bb = alloc->getBO(); @@ -110,6 +110,7 @@ void DrmCommandStreamReceiver::exec(const BatchBuffer &batchBuffer, u int err = bb->exec(static_cast(alignUp(batchBuffer.usedSize - batchBuffer.startOffset, 8)), batchBuffer.startOffset, execFlags, batchBuffer.requiresCoherency, + vmHandleId, drmContextId, this->residency.data(), this->residency.size(), this->execObjectsStorage.data()); diff --git a/opencl/source/os_interface/linux/drm_command_stream_bdw_plus.inl b/opencl/source/os_interface/linux/drm_command_stream_bdw_plus.inl index 0ea1044a8b..84d090d209 100644 --- a/opencl/source/os_interface/linux/drm_command_stream_bdw_plus.inl +++ b/opencl/source/os_interface/linux/drm_command_stream_bdw_plus.inl @@ -14,7 +14,7 @@ namespace NEO { template void DrmCommandStreamReceiver::flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency) { this->processResidency(allocationsForResidency, 0u); - this->exec(batchBuffer, static_cast(osContext)->getDrmContextIds()[0]); + this->exec(batchBuffer, 0u, static_cast(osContext)->getDrmContextIds()[0]); } } // namespace NEO diff --git a/opencl/test/unit_test/os_interface/linux/drm_buffer_object_tests.cpp b/opencl/test/unit_test/os_interface/linux/drm_buffer_object_tests.cpp index 834496cb06..c282002d77 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_buffer_object_tests.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_buffer_object_tests.cpp @@ -26,8 +26,8 @@ class TestedBufferObject : public BufferObject { this->tiling_mode = mode; } - void fillExecObject(drm_i915_gem_exec_object2 &execObject, uint32_t drmContextId) override { - BufferObject::fillExecObject(execObject, drmContextId); + void fillExecObject(drm_i915_gem_exec_object2 &execObject, uint32_t vmHandleId, uint32_t drmContextId) override { + BufferObject::fillExecObject(execObject, vmHandleId, drmContextId); execObjectPointerFilled = &execObject; } @@ -66,7 +66,7 @@ TEST_F(DrmBufferObjectTest, exec) { mock->ioctl_res = 0; drm_i915_gem_exec_object2 execObjectsStorage = {}; - auto ret = bo->exec(0, 0, 0, false, 1, nullptr, 0u, &execObjectsStorage); + auto ret = bo->exec(0, 0, 0, false, 0, 1, nullptr, 0u, &execObjectsStorage); EXPECT_EQ(mock->ioctl_res, ret); EXPECT_EQ(0u, mock->execBuffer.flags); } @@ -76,7 +76,7 @@ TEST_F(DrmBufferObjectTest, exec_ioctlFailed) { mock->ioctl_res = -1; mock->errnoValue = EFAULT; drm_i915_gem_exec_object2 execObjectsStorage = {}; - EXPECT_EQ(EFAULT, bo->exec(0, 0, 0, false, 1, nullptr, 0u, &execObjectsStorage)); + EXPECT_EQ(EFAULT, bo->exec(0, 0, 0, false, 0, 1, nullptr, 0u, &execObjectsStorage)); } TEST_F(DrmBufferObjectTest, setTiling_success) { @@ -105,7 +105,7 @@ TEST_F(DrmBufferObjectTest, givenAddressThatWhenSizeIsAddedCrosses32BitBoundaryW memset(&execObject, 0, sizeof(execObject)); bo->setAddress(((uint64_t)1u << 32) - 0x1000u); bo->setSize(0x1000); - bo->fillExecObject(execObject, 1); + bo->fillExecObject(execObject, 0, 1); //base address + size > size of 32bit address space EXPECT_TRUE(execObject.flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS); } @@ -116,7 +116,7 @@ TEST_F(DrmBufferObjectTest, givenAddressThatWhenSizeIsAddedWithin32BitBoundaryWh memset(&execObject, 0, sizeof(execObject)); bo->setAddress(((uint64_t)1u << 32) - 0x1000u); bo->setSize(0xFFF); - bo->fillExecObject(execObject, 1); + bo->fillExecObject(execObject, 0, 1); //base address + size < size of 32bit address space EXPECT_TRUE(execObject.flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS); } @@ -133,7 +133,7 @@ TEST_F(DrmBufferObjectTest, onPinIoctlFailed) { bo->setAddress(reinterpret_cast(buff.get())); BufferObject *boArray[1] = {boToPin.get()}; - auto ret = bo->pin(boArray, 1, 1); + auto ret = bo->pin(boArray, 1, 0, 1); EXPECT_EQ(EINVAL, ret); } @@ -144,7 +144,7 @@ TEST_F(DrmBufferObjectTest, whenPrintExecutionBufferIsSetToTrueThenMessageFoundI drm_i915_gem_exec_object2 execObjectsStorage = {}; testing::internal::CaptureStdout(); - auto ret = bo->exec(0, 0, 0, false, 1, nullptr, 0u, &execObjectsStorage); + auto ret = bo->exec(0, 0, 0, false, 0, 1, nullptr, 0u, &execObjectsStorage); EXPECT_EQ(0, ret); std::string output = testing::internal::GetCapturedStdout(); @@ -170,7 +170,7 @@ TEST(DrmBufferObjectSimpleTest, givenInvalidBoWhenPinIsCalledThenErrorIsReturned mock->errnoValue = EFAULT; BufferObject *boArray[1] = {boToPin.get()}; - auto ret = bo->pin(boArray, 1, 1); + auto ret = bo->pin(boArray, 1, 0, 1); EXPECT_EQ(EFAULT, ret); } @@ -200,7 +200,7 @@ TEST(DrmBufferObjectSimpleTest, givenArrayOfBosWhenPinnedThenAllBosArePinned) { BufferObject *array[3] = {boToPin.get(), boToPin2.get(), boToPin3.get()}; bo->setAddress(reinterpret_cast(buff.get())); - auto ret = bo->pin(array, 3, 1); + auto ret = bo->pin(array, 3, 0, 1); EXPECT_EQ(mock->ioctl_res, ret); EXPECT_LT(0u, mock->execBuffer.batch_len); diff --git a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index 50e57dc6ef..40e7ef05de 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -2973,7 +2973,7 @@ TEST_F(DrmMemoryManagerWithExplicitExpectationsTest, givenDisabledForcePinAndEna PinBufferObject(Drm *drm) : BufferObject(drm, 1, 0) { } - int pin(BufferObject *const boToPin[], size_t numberOfBos, uint32_t drmContextId) override { + int pin(BufferObject *const boToPin[], size_t numberOfBos, uint32_t vmHandleId, uint32_t drmContextId) override { for (size_t i = 0; i < numberOfBos; i++) { pinnedBoArray[i] = boToPin[i]; } diff --git a/shared/source/direct_submission/linux/drm_direct_submission.inl b/shared/source/direct_submission/linux/drm_direct_submission.inl index edf41908a3..3b3bfbabe6 100644 --- a/shared/source/direct_submission/linux/drm_direct_submission.inl +++ b/shared/source/direct_submission/linux/drm_direct_submission.inl @@ -54,12 +54,13 @@ bool DrmDirectSubmission::submit(uint64_t gpuAddress, siz drm_i915_gem_exec_object2 execObject{}; bool ret = false; - for (const auto &drmContextId : drmContextIds) { + for (uint32_t drmIterator = 0u; drmIterator < drmContextIds.size(); drmIterator++) { ret |= bb->exec(static_cast(size), 0, execFlags, false, - drmContextId, + drmIterator, + drmContextIds[drmIterator], nullptr, 0, &execObject); diff --git a/shared/source/os_interface/linux/CMakeLists.txt b/shared/source/os_interface/linux/CMakeLists.txt index ff3ea6e208..e29cd36907 100644 --- a/shared/source/os_interface/linux/CMakeLists.txt +++ b/shared/source/os_interface/linux/CMakeLists.txt @@ -14,6 +14,7 @@ set(NEO_CORE_OS_INTERFACE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/drm_allocation_extended.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_buffer_object.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_buffer_object.h + ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/drm_buffer_object_extended.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_gem_close_worker.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_gem_close_worker.h ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager.cpp diff --git a/shared/source/os_interface/linux/drm_buffer_object.cpp b/shared/source/os_interface/linux/drm_buffer_object.cpp index 718a0e5512..2e5c463e7e 100644 --- a/shared/source/os_interface/linux/drm_buffer_object.cpp +++ b/shared/source/os_interface/linux/drm_buffer_object.cpp @@ -90,7 +90,7 @@ bool BufferObject::setTiling(uint32_t mode, uint32_t stride) { return set_tiling.tiling_mode == mode; } -void BufferObject::fillExecObject(drm_i915_gem_exec_object2 &execObject, uint32_t drmContextId) { +void BufferObject::fillExecObject(drm_i915_gem_exec_object2 &execObject, uint32_t vmHandleId, uint32_t drmContextId) { execObject.handle = this->handle; execObject.relocation_count = 0; //No relocations, we are SoftPinning execObject.relocs_ptr = 0ul; @@ -99,13 +99,15 @@ void BufferObject::fillExecObject(drm_i915_gem_exec_object2 &execObject, uint32_ execObject.flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS; execObject.rsvd1 = drmContextId; execObject.rsvd2 = 0; + + this->fillExecObjectImpl(execObject, vmHandleId); } -int BufferObject::exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency, uint32_t drmContextId, BufferObject *const residency[], size_t residencyCount, drm_i915_gem_exec_object2 *execObjectsStorage) { +int BufferObject::exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency, uint32_t vmHandleId, uint32_t drmContextId, BufferObject *const residency[], size_t residencyCount, drm_i915_gem_exec_object2 *execObjectsStorage) { for (size_t i = 0; i < residencyCount; i++) { - residency[i]->fillExecObject(execObjectsStorage[i], drmContextId); + residency[i]->fillExecObject(execObjectsStorage[i], vmHandleId, drmContextId); } - this->fillExecObject(execObjectsStorage[residencyCount], drmContextId); + this->fillExecObject(execObjectsStorage[residencyCount], vmHandleId, drmContextId); drm_i915_gem_execbuffer2 execbuf{}; execbuf.buffers_ptr = reinterpret_cast(execObjectsStorage); @@ -176,9 +178,9 @@ void BufferObject::printExecutionBuffer(drm_i915_gem_execbuffer2 &execbuf, const std::cout << logger << std::endl; } -int BufferObject::pin(BufferObject *const boToPin[], size_t numberOfBos, uint32_t drmContextId) { +int BufferObject::pin(BufferObject *const boToPin[], size_t numberOfBos, uint32_t vmHandleId, uint32_t drmContextId) { StackVec execObject(numberOfBos + 1); - return this->exec(4u, 0u, 0u, false, drmContextId, boToPin, numberOfBos, &execObject[0]); + return this->exec(4u, 0u, 0u, false, vmHandleId, drmContextId, boToPin, numberOfBos, &execObject[0]); } } // namespace NEO diff --git a/shared/source/os_interface/linux/drm_buffer_object.h b/shared/source/os_interface/linux/drm_buffer_object.h index 239b216279..d8f0ab6561 100644 --- a/shared/source/os_interface/linux/drm_buffer_object.h +++ b/shared/source/os_interface/linux/drm_buffer_object.h @@ -39,9 +39,9 @@ class BufferObject { bool setTiling(uint32_t mode, uint32_t stride); - MOCKABLE_VIRTUAL int pin(BufferObject *const boToPin[], size_t numberOfBos, uint32_t drmContextId); + MOCKABLE_VIRTUAL int pin(BufferObject *const boToPin[], size_t numberOfBos, uint32_t vmHandleId, uint32_t drmContextId); - int exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency, uint32_t drmContextId, BufferObject *const residency[], size_t residencyCount, drm_i915_gem_exec_object2 *execObjectsStorage); + int exec(uint32_t used, size_t startOffset, unsigned int flags, bool requiresCoherency, uint32_t vmHandleId, uint32_t drmContextId, BufferObject *const residency[], size_t residencyCount, drm_i915_gem_exec_object2 *execObjectsStorage); void bind(uint32_t vmHandleId); void unbind(uint32_t vmHandleId); @@ -78,7 +78,8 @@ class BufferObject { //Tiling uint32_t tiling_mode; - MOCKABLE_VIRTUAL void fillExecObject(drm_i915_gem_exec_object2 &execObject, uint32_t drmContextId); + MOCKABLE_VIRTUAL void fillExecObject(drm_i915_gem_exec_object2 &execObject, uint32_t vmHandleId, uint32_t drmContextId); + void fillExecObjectImpl(drm_i915_gem_exec_object2 &execObject, uint32_t vmHandleId); uint64_t gpuAddress = 0llu; diff --git a/shared/source/os_interface/linux/drm_buffer_object_extended.cpp b/shared/source/os_interface/linux/drm_buffer_object_extended.cpp new file mode 100644 index 0000000000..9363ad2086 --- /dev/null +++ b/shared/source/os_interface/linux/drm_buffer_object_extended.cpp @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2017-2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/os_interface/linux/drm_buffer_object.h" + +namespace NEO { + +void BufferObject::fillExecObjectImpl(drm_i915_gem_exec_object2 &execObject, uint32_t vmHandleId) {} + +} // namespace NEO diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index dcdaa51042..9a824d0e18 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -181,7 +181,7 @@ NEO::BufferObject *DrmMemoryManager::allocUserptr(uintptr_t address, size_t size void DrmMemoryManager::emitPinningRequest(BufferObject *bo, const AllocationData &allocationData) const { if (forcePinEnabled && pinBBs.at(allocationData.rootDeviceIndex) != nullptr && allocationData.flags.forcePin && allocationData.size >= this->pinThreshold) { - pinBBs.at(allocationData.rootDeviceIndex)->pin(&bo, 1, getDefaultDrmContextId()); + pinBBs.at(allocationData.rootDeviceIndex)->pin(&bo, 1, 0, getDefaultDrmContextId()); } } @@ -276,7 +276,7 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryWithGpuVa(const Allo BufferObject *boPtr = bo.get(); if (forcePinEnabled && pinBBs.at(allocationData.rootDeviceIndex) != nullptr && alignedSize >= this->pinThreshold) { - pinBBs.at(allocationData.rootDeviceIndex)->pin(&boPtr, 1, osContextLinux->getContextId()); + pinBBs.at(allocationData.rootDeviceIndex)->pin(&boPtr, 1, 0, osContextLinux->getDrmContextIds()[0]); } auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, allocationData.type, bo.get(), res, bo->gpuAddress, alignedSize, MemoryPool::System4KBPages); @@ -310,7 +310,7 @@ DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryForNonSvmHostPtr(const Al if (validateHostPtrMemory) { auto boPtr = bo.get(); - int result = pinBBs.at(allocationData.rootDeviceIndex)->pin(&boPtr, 1, getDefaultDrmContextId()); + int result = pinBBs.at(allocationData.rootDeviceIndex)->pin(&boPtr, 1, 0, getDefaultDrmContextId()); if (result != SUCCESS) { unreference(bo.release(), true); releaseGpuRange(reinterpret_cast(gpuVirtualAddress), alignedSize, allocationData.rootDeviceIndex); @@ -666,7 +666,7 @@ MemoryManager::AllocationStatus DrmMemoryManager::populateOsHandles(OsHandleStor } if (validateHostPtrMemory) { - int result = pinBBs.at(rootDeviceIndex)->pin(allocatedBos, numberOfBosAllocated, getDefaultDrmContextId()); + int result = pinBBs.at(rootDeviceIndex)->pin(allocatedBos, numberOfBosAllocated, 0, getDefaultDrmContextId()); if (result == EFAULT) { for (uint32_t i = 0; i < numberOfBosAllocated; i++) {