From f217c9198c078547abbc4745ae2d0a6e8621d88e Mon Sep 17 00:00:00 2001 From: Slawomir Milczarek Date: Mon, 26 Feb 2018 23:23:43 +0100 Subject: [PATCH] Add support for resource lock / unlock on Linux OS This commit fixes the issue with image contents writes in the configuration of CSR HW with AUB dump. Change-Id: Id0c4f36d4f9eee5175267384d42cb75bf41062f3 --- .../os_interface/linux/drm_buffer_object.cpp | 5 +- .../os_interface/linux/drm_buffer_object.h | 17 +- .../os_interface/linux/drm_memory_manager.cpp | 77 ++++++++- .../os_interface/linux/drm_memory_manager.h | 5 +- .../linux/device_command_stream_fixture.h | 29 +++- .../linux/drm_memory_manager_tests.cpp | 159 +++++++++++++++++- 6 files changed, 272 insertions(+), 20 deletions(-) diff --git a/runtime/os_interface/linux/drm_buffer_object.cpp b/runtime/os_interface/linux/drm_buffer_object.cpp index 329a7f3193..4f0aa03eee 100644 --- a/runtime/os_interface/linux/drm_buffer_object.cpp +++ b/runtime/os_interface/linux/drm_buffer_object.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -53,6 +53,7 @@ BufferObject::BufferObject(Drm *drm, int handle, bool isAllocated) : drm(drm), r this->size = 0; this->address = nullptr; + this->lockedAddress = nullptr; this->offset64 = 0; } @@ -126,7 +127,7 @@ bool BufferObject::setTiling(uint32_t mode, uint32_t stride) { this->stride = set_tiling.stride; return set_tiling.tiling_mode == mode; -}; +} void BufferObject::fillExecObject(drm_i915_gem_exec_object2 &execObject) { execObject.handle = this->handle; diff --git a/runtime/os_interface/linux/drm_buffer_object.h b/runtime/os_interface/linux/drm_buffer_object.h index 1945c6e9d0..e0c1645ef5 100644 --- a/runtime/os_interface/linux/drm_buffer_object.h +++ b/runtime/os_interface/linux/drm_buffer_object.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -69,13 +69,15 @@ class BufferObject { } uint32_t getRefCount() const; - bool peekIsAllocated() { return isAllocated; } - size_t peekSize() { return size; } - int peekHandle() { return handle; } - void *peekAddress() { return address; } + bool peekIsAllocated() const { return isAllocated; } + size_t peekSize() const { return size; } + int peekHandle() const { return handle; } + void *peekAddress() const { return address; } void setAddress(void *address) { this->address = address; } + void *peekLockedAddress() const { return lockedAddress; } + void setLockedAddress(void *cpuAddress) { this->lockedAddress = cpuAddress; } void setUnmapSize(uint64_t unmapSize) { this->unmapSize = unmapSize; } - uint64_t peekUnmapSize() { return unmapSize; } + uint64_t peekUnmapSize() const { return unmapSize; } void swapResidencyVector(ResidencyVector *residencyVect) { std::swap(this->residency, *residencyVect); } @@ -83,7 +85,7 @@ class BufferObject { execObjectsStorage = storage; } ResidencyVector *getResidency() { return &residency; } - StorageAllocatorType peekAllocationType() { return storageAllocatorType; } + StorageAllocatorType peekAllocationType() const { return storageAllocatorType; } void setAllocationType(StorageAllocatorType allocatorType) { this->storageAllocatorType = allocatorType; } protected: @@ -110,6 +112,7 @@ class BufferObject { uint64_t offset64; // last-seen GPU offset size_t size; void *address; // GPU side virtual address + void *lockedAddress; // CPU side virtual address bool isAllocated = false; uint64_t unmapSize = 0; diff --git a/runtime/os_interface/linux/drm_memory_manager.cpp b/runtime/os_interface/linux/drm_memory_manager.cpp index c6e67a1be1..f3af2e0933 100644 --- a/runtime/os_interface/linux/drm_memory_manager.cpp +++ b/runtime/os_interface/linux/drm_memory_manager.cpp @@ -245,7 +245,7 @@ GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryForImage(ImageInfo & bo->setUnmapSize(imgInfo.size); - auto allocation = new DrmAllocation(bo, gpuRange, imgInfo.size); + auto allocation = new DrmAllocation(bo, nullptr, (uint64_t)gpuRange, imgInfo.size); bo->setAllocationType(MMAP_ALLOCATOR); allocation->gmm = gmm; return allocation; @@ -444,7 +444,7 @@ void DrmMemoryManager::freeGraphicsMemoryImpl(GraphicsAllocation *gfxAllocation) search->wait(-1); unreference(search); -}; +} uint64_t DrmMemoryManager::getSystemSharedMemory() { uint64_t hostMemorySize = MemoryConstants::pageSize * (uint64_t)(sysconf(_SC_PHYS_PAGES)); @@ -501,4 +501,77 @@ void DrmMemoryManager::cleanOsHandles(OsHandleStorage &handleStorage) { BufferObject *DrmMemoryManager::getPinBB() const { return pinBB; } + +bool DrmMemoryManager::setDomainCpu(GraphicsAllocation &graphicsAllocation, bool writeEnable) { + DEBUG_BREAK_IF(writeEnable); //unsupported path (for CPU writes call SW_FINISH ioctl in unlockResource) + + auto bo = static_cast(&graphicsAllocation)->getBO(); + if (bo == nullptr) + return false; + + // move a buffer object to the CPU read, and possibly write domain, including waiting on flushes to occur + struct drm_i915_gem_set_domain set_domain; + memset(&set_domain, 0, sizeof(set_domain)); + set_domain.handle = bo->peekHandle(); + set_domain.read_domains = I915_GEM_DOMAIN_CPU; + set_domain.write_domain = writeEnable ? I915_GEM_DOMAIN_CPU : 0; + auto ret = drm->ioctl(DRM_IOCTL_I915_GEM_SET_DOMAIN, + &set_domain); + if (ret != 0) + return false; + + return true; +} + +void *DrmMemoryManager::lockResource(GraphicsAllocation *graphicsAllocation) { + if (graphicsAllocation == nullptr) + return nullptr; + + auto cpuPtr = graphicsAllocation->getUnderlyingBuffer(); + if (cpuPtr != nullptr) { + auto success = setDomainCpu(*graphicsAllocation, false); + DEBUG_BREAK_IF(!success); + (void)success; + return cpuPtr; + } + + auto bo = static_cast(graphicsAllocation)->getBO(); + if (bo == nullptr) + return nullptr; + + struct drm_i915_gem_mmap mmap_arg; + memset(&mmap_arg, 0, sizeof(mmap_arg)); + mmap_arg.handle = bo->peekHandle(); + mmap_arg.size = bo->peekSize(); + auto ret = drm->ioctl(DRM_IOCTL_I915_GEM_MMAP, + &mmap_arg); + if (ret != 0) + return nullptr; + + bo->setLockedAddress(reinterpret_cast(mmap_arg.addr_ptr)); + + auto success = setDomainCpu(*graphicsAllocation, false); + DEBUG_BREAK_IF(!success); + (void)success; + + return bo->peekLockedAddress(); +} + +void DrmMemoryManager::unlockResource(GraphicsAllocation *graphicsAllocation) { + if (graphicsAllocation == nullptr) + return; + + auto cpuPtr = graphicsAllocation->getUnderlyingBuffer(); + if (cpuPtr != nullptr) { + return; + } + + auto bo = static_cast(graphicsAllocation)->getBO(); + if (bo == nullptr) + return; + + munmapFunction(bo->peekLockedAddress(), bo->peekSize()); + + bo->setLockedAddress(nullptr); +} } // namespace OCLRT diff --git a/runtime/os_interface/linux/drm_memory_manager.h b/runtime/os_interface/linux/drm_memory_manager.h index 4fe6704bd1..46328b8b0c 100644 --- a/runtime/os_interface/linux/drm_memory_manager.h +++ b/runtime/os_interface/linux/drm_memory_manager.h @@ -57,8 +57,8 @@ class DrmMemoryManager : public MemoryManager { GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, bool requireSpecificBitness, bool reuseBO) override; GraphicsAllocation *createPaddedAllocation(GraphicsAllocation *inputGraphicsAllocation, size_t sizeWithPadding) override; GraphicsAllocation *createGraphicsAllocationFromNTHandle(void *handle) override { return nullptr; } - void *lockResource(GraphicsAllocation *graphicsAllocation) override { return nullptr; }; - void unlockResource(GraphicsAllocation *graphicsAllocation) override{}; + void *lockResource(GraphicsAllocation *graphicsAllocation) override; + void unlockResource(GraphicsAllocation *graphicsAllocation) override; uint64_t getSystemSharedMemory() override; uint64_t getMaxApplicationAddress() override; @@ -80,6 +80,7 @@ class DrmMemoryManager : public MemoryManager { void eraseSharedBufferObject(BufferObject *bo); void pushSharedBufferObject(BufferObject *bo); BufferObject *allocUserptr(uintptr_t address, size_t size, uint64_t flags, bool softpin); + bool setDomainCpu(GraphicsAllocation &graphicsAllocation, bool writeEnable); Drm *drm; BufferObject *pinBB; diff --git a/unit_tests/os_interface/linux/device_command_stream_fixture.h b/unit_tests/os_interface/linux/device_command_stream_fixture.h index 533f937f5b..25011e4007 100644 --- a/unit_tests/os_interface/linux/device_command_stream_fixture.h +++ b/unit_tests/os_interface/linux/device_command_stream_fixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Intel Corporation + * Copyright (c) 2017 - 2018, Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -28,6 +28,7 @@ #include #include "drm/i915_drm.h" #include "runtime/helpers/aligned_memory.h" +#include "runtime/os_interface/linux/drm_memory_manager.h" #define RENDER_DEVICE_NAME_MATCHER ::testing::StrEq("/dev/dri/renderD128") @@ -100,6 +101,17 @@ class DrmMockCustom : public Drm { //DRM_IOCTL_I915_GEM_USERPTR __u32 returnHandle = 0; __u64 gpuMemSize = 3u * MemoryConstants::gigaByte; + //DRM_IOCTL_I915_GEM_MMAP + __u32 mmapHandle = 0; + __u32 mmapPad = 0; + __u64 mmapOffset = 0; + __u64 mmapSize = 0; + __u64 mmapAddrPtr = 0x7F4000001000; + __u64 mmapFlags = 0; + //DRM_IOCTL_I915_GEM_SET_DOMAIN + __u32 setDomainHandle = 0; + __u32 setDomainReadDomains = 0; + __u32 setDomainWriteDomain = 0; int ioctl(unsigned long request, void *arg) override { auto ext = ioctl_res_ext.load(); @@ -138,6 +150,21 @@ class DrmMockCustom : public Drm { aperture->aper_available_size = gpuMemSize; aperture->aper_size = gpuMemSize; } + if (request == DRM_IOCTL_I915_GEM_MMAP) { + auto mmapParams = (drm_i915_gem_mmap *)arg; + mmapHandle = mmapParams->handle; + mmapPad = mmapParams->pad; + mmapOffset = mmapParams->offset; + mmapSize = mmapParams->size; + mmapFlags = mmapParams->flags; + mmapParams->addr_ptr = mmapAddrPtr; + } + if (request == DRM_IOCTL_I915_GEM_SET_DOMAIN) { + auto setDomainParams = (drm_i915_gem_set_domain *)arg; + setDomainHandle = setDomainParams->handle; + setDomainReadDomains = setDomainParams->read_domains; + setDomainWriteDomain = setDomainParams->write_domain; + } if (ext->no != -1 && ext->no == ioctl_cnt.load()) { ioctl_cnt.fetch_add(1); diff --git a/unit_tests/os_interface/linux/drm_memory_manager_tests.cpp b/unit_tests/os_interface/linux/drm_memory_manager_tests.cpp index 522adc7c26..39b9bdb732 100644 --- a/unit_tests/os_interface/linux/drm_memory_manager_tests.cpp +++ b/unit_tests/os_interface/linux/drm_memory_manager_tests.cpp @@ -78,6 +78,9 @@ int closeMock(int) { class TestedDrmMemoryManager : public DrmMemoryManager { public: + using DrmMemoryManager::allocUserptr; + using DrmMemoryManager::setDomainCpu; + TestedDrmMemoryManager(Drm *drm) : DrmMemoryManager(drm, gemCloseWorkerMode::gemCloseWorkerConsumingCommandBuffers, false) { this->lseekFunction = &lseekMock; this->mmapFunction = &mmapMock; @@ -103,10 +106,8 @@ class TestedDrmMemoryManager : public DrmMemoryManager { DrmMemoryManager::unreference(bo); } - BufferObject *allocUserptr(uintptr_t address, size_t size, uint64_t flags, bool softpin) { - return DrmMemoryManager::allocUserptr(address, size, flags, softpin); - } DrmGemCloseWorker *getgemCloseWorker() { return this->gemCloseWorker.get(); } + Allocator32bit *getDrmInternal32BitAllocator() { return internal32bitAllocator.get(); } }; @@ -1001,6 +1002,9 @@ TEST_F(DrmMemoryManagerTest, GivenMemoryManagerWhenAllocateGraphicsMemoryForImag queryGmm.release(); ASSERT_NE(nullptr, imageGraphicsAllocation); + EXPECT_NE(0u, imageGraphicsAllocation->getGpuAddress()); + EXPECT_EQ(nullptr, imageGraphicsAllocation->getUnderlyingBuffer()); + EXPECT_TRUE(imageGraphicsAllocation->gmm->resourceParams.Usage == GMM_RESOURCE_USAGE_TYPE::GMM_RESOURCE_USAGE_OCL_IMAGE); @@ -1369,18 +1373,161 @@ TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenCreateAllocationFromNtHand EXPECT_EQ(nullptr, graphicsAllocation); } -TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockCalledThenDoNothing) { - mock->ioctl_expected = 3; +TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledThenReturnPtr) { + mock->ioctl_expected = 4; auto allocation = memoryManager->allocateGraphicsMemory(1, 1); ASSERT_NE(nullptr, allocation); auto ptr = memoryManager->lockResource(allocation); - EXPECT_EQ(nullptr, ptr); + EXPECT_NE(nullptr, ptr); + memoryManager->unlockResource(allocation); + memoryManager->freeGraphicsMemory(allocation); +} + +TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledOnAllocationWithCpuPtrThenReturnCpuPtrAndSetCpuDomain) { + mock->ioctl_expected = 4; + auto allocation = memoryManager->allocateGraphicsMemory(1, 1); + ASSERT_NE(nullptr, allocation); + EXPECT_NE(nullptr, allocation->getUnderlyingBuffer()); + + auto ptr = memoryManager->lockResource(allocation); + EXPECT_EQ(allocation->getUnderlyingBuffer(), ptr); + + //check DRM_IOCTL_I915_GEM_SET_DOMAIN input params + auto drmAllocation = (DrmAllocation *)allocation; + EXPECT_EQ((uint32_t)drmAllocation->getBO()->peekHandle(), mock->setDomainHandle); + EXPECT_EQ((uint32_t)I915_GEM_DOMAIN_CPU, mock->setDomainReadDomains); + EXPECT_EQ(0u, mock->setDomainWriteDomain); + + memoryManager->unlockResource(allocation); + memoryManager->freeGraphicsMemory(allocation); +} + +TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledOnAllocationWithoutCpuPtrThenReturnLockedPtrAndSetCpuDomain) { + mock->ioctl_expected = 6; + cl_image_desc imgDesc = {}; + imgDesc.image_type = CL_MEM_OBJECT_IMAGE2D; + imgDesc.image_width = 512; + imgDesc.image_height = 512; + auto imgInfo = MockGmm::initImgInfo(imgDesc, 0, nullptr); + imgInfo.imgDesc = &imgDesc; + imgInfo.size = 4096u; + imgInfo.rowPitch = 512u; + + auto queryGmm = MockGmm::queryImgParams(imgInfo); + auto allocation = memoryManager->allocateGraphicsMemoryForImage(imgInfo, queryGmm.get()); + queryGmm.release(); + ASSERT_NE(nullptr, allocation); + EXPECT_EQ(nullptr, allocation->getUnderlyingBuffer()); + + auto ptr = memoryManager->lockResource(allocation); + EXPECT_NE(nullptr, ptr); + + auto drmAllocation = (DrmAllocation *)allocation; + EXPECT_NE(nullptr, drmAllocation->getBO()->peekLockedAddress()); + + //check DRM_IOCTL_I915_GEM_MMAP input params + EXPECT_EQ((uint32_t)drmAllocation->getBO()->peekHandle(), mock->mmapHandle); + EXPECT_EQ(0u, mock->mmapPad); + EXPECT_EQ(0u, mock->mmapOffset); + EXPECT_EQ(drmAllocation->getBO()->peekSize(), mock->mmapSize); + EXPECT_EQ(0u, mock->mmapFlags); + + //check DRM_IOCTL_I915_GEM_SET_DOMAIN input params + EXPECT_EQ((uint32_t)drmAllocation->getBO()->peekHandle(), mock->setDomainHandle); + EXPECT_EQ((uint32_t)I915_GEM_DOMAIN_CPU, mock->setDomainReadDomains); + EXPECT_EQ(0u, mock->setDomainWriteDomain); + + memoryManager->unlockResource(allocation); + EXPECT_EQ(nullptr, drmAllocation->getBO()->peekLockedAddress()); memoryManager->freeGraphicsMemory(allocation); } +TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledOnNullAllocationThenReturnNullPtr) { + GraphicsAllocation *allocation = nullptr; + + auto ptr = memoryManager->lockResource(allocation); + EXPECT_EQ(nullptr, ptr); + + memoryManager->unlockResource(allocation); +} + +TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledOnAllocationWithoutBufferObjectThenReturnNullPtr) { + DrmAllocation drmAllocation(nullptr, nullptr, 0u, 0u); + EXPECT_EQ(nullptr, drmAllocation.getBO()); + + auto ptr = memoryManager->lockResource(&drmAllocation); + EXPECT_EQ(nullptr, ptr); + + memoryManager->unlockResource(&drmAllocation); +} + +TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenLockUnlockIsCalledButFailsOnIoctlMmapThenReturnNullPtr) { + mock->ioctl_expected = 1; + this->ioctlResExt = {0, -1}; + mock->ioctl_res_ext = &ioctlResExt; + + DrmMockCustom drmMock; + struct BufferObjectMock : public BufferObject { + BufferObjectMock(Drm *drm) : BufferObject(drm, 1, true) {} + }; + BufferObjectMock bo(&drmMock); + DrmAllocation drmAllocation(&bo, nullptr, 0u, 0u); + EXPECT_NE(nullptr, drmAllocation.getBO()); + + auto ptr = memoryManager->lockResource(&drmAllocation); + EXPECT_EQ(nullptr, ptr); + + memoryManager->unlockResource(&drmAllocation); +} + +TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetDomainCpuIsCalledOnAllocationWithoutBufferObjectThenReturnFalse) { + DrmAllocation drmAllocation(nullptr, nullptr, 0u, 0u); + EXPECT_EQ(nullptr, drmAllocation.getBO()); + + auto success = memoryManager->setDomainCpu(drmAllocation, false); + EXPECT_FALSE(success); +} + +TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetDomainCpuIsCalledButFailsOnIoctlSetDomainThenReturnFalse) { + mock->ioctl_expected = 1; + this->ioctlResExt = {0, -1}; + mock->ioctl_res_ext = &ioctlResExt; + + DrmMockCustom drmMock; + struct BufferObjectMock : public BufferObject { + BufferObjectMock(Drm *drm) : BufferObject(drm, 1, true) {} + }; + BufferObjectMock bo(&drmMock); + DrmAllocation drmAllocation(&bo, nullptr, 0u, 0u); + EXPECT_NE(nullptr, drmAllocation.getBO()); + + auto success = memoryManager->setDomainCpu(drmAllocation, false); + EXPECT_FALSE(success); +} + +TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerWhenSetDomainCpuIsCalledOnAllocationThenReturnSetWriteDomain) { + mock->ioctl_expected = 1; + + DrmMockCustom drmMock; + struct BufferObjectMock : public BufferObject { + BufferObjectMock(Drm *drm) : BufferObject(drm, 1, true) {} + }; + BufferObjectMock bo(&drmMock); + DrmAllocation drmAllocation(&bo, nullptr, 0u, 0u); + EXPECT_NE(nullptr, drmAllocation.getBO()); + + auto success = memoryManager->setDomainCpu(drmAllocation, true); + EXPECT_TRUE(success); + + //check DRM_IOCTL_I915_GEM_SET_DOMAIN input params + EXPECT_EQ((uint32_t)drmAllocation.getBO()->peekHandle(), mock->setDomainHandle); + EXPECT_EQ((uint32_t)I915_GEM_DOMAIN_CPU, mock->setDomainReadDomains); + EXPECT_EQ((uint32_t)I915_GEM_DOMAIN_CPU, mock->setDomainWriteDomain); +} + TEST_F(DrmMemoryManagerTest, givenDrmMemoryManagerAndUnifiedAuxCapableAllocationWhenMappingThenReturnFalse) { mock->ioctl_expected = 3; auto gmm = Gmm::create(nullptr, 123, false);