diff --git a/shared/source/os_interface/linux/drm_allocation.cpp b/shared/source/os_interface/linux/drm_allocation.cpp index 923f463aa1..209bd946da 100644 --- a/shared/source/os_interface/linux/drm_allocation.cpp +++ b/shared/source/os_interface/linux/drm_allocation.cpp @@ -182,6 +182,48 @@ bool DrmAllocation::setCacheAdvice(Drm *drm, size_t regionSize, CacheRegion regi return true; } +int DrmAllocation::prefetchBOWithChunking(Drm *drm) { + auto getSubDeviceIds = [](const DeviceBitfield &subDeviceBitfield) { + SubDeviceIdsVec subDeviceIds; + for (auto subDeviceId = 0u; subDeviceId < subDeviceBitfield.size(); subDeviceId++) { + if (subDeviceBitfield.test(subDeviceId)) { + subDeviceIds.push_back(subDeviceId); + } + } + return subDeviceIds; + }; + + auto bo = this->getBO(); + + auto ioctlHelper = drm->getIoctlHelper(); + auto memoryClassDevice = ioctlHelper->getDrmParamValue(DrmParam::MemoryClassDevice); + auto subDeviceIds = getSubDeviceIds(storageInfo.subDeviceBitfield); + + uint32_t chunksPerSubDevice = this->storageInfo.numOfChunks / subDeviceIds.size(); + uint64_t chunkLength = (bo->peekSize() / this->storageInfo.numOfChunks); + bool success = true; + for (uint32_t i = 0; i < this->storageInfo.numOfChunks; i++) { + uint64_t chunkStart = bo->peekAddress() + i * chunkLength; + auto subDeviceId = subDeviceIds[i / chunksPerSubDevice]; + for (auto vmHandleId : subDeviceIds) { + auto region = static_cast((memoryClassDevice << 16u) | subDeviceId); + auto vmId = drm->getVirtualMemoryAddressSpace(vmHandleId); + + PRINT_DEBUG_STRING(DebugManager.flags.PrintBOPrefetchingResult.get(), stdout, + "prefetching BO=%d to VM %u, drmVmId=%u, range: %llx - %llx, size: %lld, region: %x\n", + bo->peekHandle(), vmId, vmHandleId, chunkStart, ptrOffset(chunkStart, chunkLength), chunkLength, region); + + success &= ioctlHelper->setVmPrefetch(chunkStart, chunkLength, region, vmId); + + PRINT_DEBUG_STRING(DebugManager.flags.PrintBOPrefetchingResult.get(), stdout, + "prefetched BO=%d to VM %u, drmVmId=%u, range: %llx - %llx, size: %lld, region: %x, result: %d\n", + bo->peekHandle(), vmId, vmHandleId, chunkStart, ptrOffset(chunkStart, chunkLength), chunkLength, region, success); + } + } + + return success; +} + int DrmAllocation::makeBOsResident(OsContext *osContext, uint32_t vmHandleId, std::vector *bufferObjects, bool bind) { if (this->fragmentsStorage.fragmentCount) { for (unsigned int f = 0; f < this->fragmentsStorage.fragmentCount; f++) { @@ -434,7 +476,12 @@ bool DrmAllocation::setMemPrefetch(Drm *drm, SubDeviceIdsVec &subDeviceIds) { } } else { auto bo = this->getBO(); - success = prefetchBO(bo, subDeviceIds[0], subDeviceIds[0]); + if (bo->isChunked) { + auto drm = bo->peekDrm(); + success = prefetchBOWithChunking(const_cast(drm)); + } else { + success = prefetchBO(bo, subDeviceIds[0], subDeviceIds[0]); + } } return success; diff --git a/shared/source/os_interface/linux/drm_allocation.h b/shared/source/os_interface/linux/drm_allocation.h index 1fc7a7b959..f412cd7eb0 100644 --- a/shared/source/os_interface/linux/drm_allocation.h +++ b/shared/source/os_interface/linux/drm_allocation.h @@ -124,6 +124,7 @@ class DrmAllocation : public GraphicsAllocation { this->osContext = context; } + int prefetchBOWithChunking(Drm *drm); MOCKABLE_VIRTUAL int makeBOsResident(OsContext *osContext, uint32_t vmHandleId, std::vector *bufferObjects, bool bind); MOCKABLE_VIRTUAL int bindBO(BufferObject *bo, OsContext *osContext, uint32_t vmHandleId, std::vector *bufferObjects, bool bind); MOCKABLE_VIRTUAL int bindBOs(OsContext *osContext, uint32_t vmHandleId, std::vector *bufferObjects, bool bind); diff --git a/shared/source/os_interface/linux/drm_memory_manager.h b/shared/source/os_interface/linux/drm_memory_manager.h index d957b38b39..22f33d8daf 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.h +++ b/shared/source/os_interface/linux/drm_memory_manager.h @@ -91,6 +91,7 @@ class DrmMemoryManager : public MemoryManager { void releaseDeviceSpecificMemResources(uint32_t rootDeviceIndex) override; void createDeviceSpecificMemResources(uint32_t rootDeviceIndex) override; bool allowIndirectAllocationsAsPack(uint32_t rootDeviceIndex) override; + Drm &getDrm(uint32_t rootDeviceIndex) const; protected: void registerSharedBoHandleAllocation(DrmAllocation *drmAllocation); @@ -146,7 +147,6 @@ class DrmMemoryManager : public MemoryManager { inline std::unique_ptr makeGmmIfSingleHandle(const AllocationData &allocationData, size_t sizeAligned); inline std::unique_ptr makeDrmAllocation(const AllocationData &allocationData, std::unique_ptr gmm, uint64_t gpuAddress, size_t sizeAligned); - Drm &getDrm(uint32_t rootDeviceIndex) const; uint32_t getRootDeviceIndex(const Drm *drm); BufferObject *createRootDeviceBufferObject(uint32_t rootDeviceIndex); void releaseBufferObject(uint32_t rootDeviceIndex); diff --git a/shared/source/os_interface/linux/drm_memory_operations_handler_bind.cpp b/shared/source/os_interface/linux/drm_memory_operations_handler_bind.cpp index 562c5a50d5..73a78ceca8 100644 --- a/shared/source/os_interface/linux/drm_memory_operations_handler_bind.cpp +++ b/shared/source/os_interface/linux/drm_memory_operations_handler_bind.cpp @@ -65,6 +65,12 @@ MemoryOperationsStatus DrmMemoryOperationsHandlerBind::makeResidentWithinOsConte } } + if (bo->isChunked) { + auto memoryManager = static_cast(this->rootDeviceEnvironment.executionEnvironment.memoryManager.get()); + auto drm = &memoryManager->getDrm(drmAllocation->getRootDeviceIndex()); + drmAllocation->prefetchBOWithChunking(drm); + } + if (!evictable) { drmAllocation->updateResidencyTaskCount(GraphicsAllocation::objectAlwaysResident, osContext->getContextId()); } diff --git a/shared/source/os_interface/linux/drm_neo.cpp b/shared/source/os_interface/linux/drm_neo.cpp index 362a8e7926..ce3446f82f 100644 --- a/shared/source/os_interface/linux/drm_neo.cpp +++ b/shared/source/os_interface/linux/drm_neo.cpp @@ -1249,7 +1249,7 @@ int changeBufferObjectBinding(Drm *drm, OsContext *osContext, uint32_t vmHandleI bool bindImmediate = bo->isImmediateBindingRequired(); bool bindMakeResident = false; if (drm->useVMBindImmediate()) { - bindMakeResident = bo->isExplicitResidencyRequired() && !bo->isChunked; + bindMakeResident = bo->isExplicitResidencyRequired(); bindImmediate = true; } flags |= ioctlHelper->getFlagsForVmBind(bindCapture, bindImmediate, bindMakeResident); diff --git a/shared/source/os_interface/linux/ioctl_helper_prelim.cpp b/shared/source/os_interface/linux/ioctl_helper_prelim.cpp index 6f4519355f..14cced9326 100644 --- a/shared/source/os_interface/linux/ioctl_helper_prelim.cpp +++ b/shared/source/os_interface/linux/ioctl_helper_prelim.cpp @@ -67,7 +67,6 @@ bool IoctlHelperPrelim20::isChunkingAvailable() { if (retVal) { return false; } - return chunkSupported; } @@ -177,17 +176,13 @@ int IoctlHelperPrelim20::createGemExt(const MemRegionsVec &memClassInstances, si pairSetparamRegion.param.data = pairHandle; } + size_t chunkingSize = 0u; if (isChunked) { - size_t chunkingSize = allocSize / numOfChunks; + chunkingSize = allocSize / numOfChunks; chunkingParamRegion.base.name = PRELIM_I915_GEM_CREATE_EXT_SETPARAM; chunkingParamRegion.param.param = PRELIM_I915_OBJECT_PARAM | PRELIM_I915_PARAM_SET_CHUNK_SIZE; UNRECOVERABLE_IF(chunkingSize & (MemoryConstants::pageSize64k - 1)); chunkingParamRegion.param.data = chunkingSize; - printDebugString(DebugManager.flags.PrintBOChunkingLogs.get(), stdout, - "GEM_CREATE_EXT with BOChunkingSize %d, chunkingParamRegion.param.data %d, numOfChunks %d\n", - chunkingSize, - chunkingParamRegion.param.data, - numOfChunks); setparamRegion.base.next_extension = reinterpret_cast(&chunkingParamRegion); } else { if (vmId != std::nullopt) { @@ -216,6 +211,15 @@ int IoctlHelperPrelim20::createGemExt(const MemRegionsVec &memClassInstances, si auto ret = IoctlHelper::ioctl(DrmIoctl::GemCreateExt, &createExt); + if (isChunked) { + printDebugString(DebugManager.flags.PrintBOChunkingLogs.get(), stdout, + "GEM_CREATE_EXT BO-%d with BOChunkingSize %d, chunkingParamRegion.param.data %d, numOfChunks %d\n", + createExt.handle, + chunkingSize, + chunkingParamRegion.param.data, + numOfChunks); + } + printDebugString(DebugManager.flags.PrintBOCreateDestroyResult.get(), stdout, "GEM_CREATE_EXT has returned: %d BO-%u with size: %lu\n", ret, createExt.handle, createExt.size); handle = createExt.handle; return ret; diff --git a/shared/test/common/mocks/linux/mock_drm_allocation.h b/shared/test/common/mocks/linux/mock_drm_allocation.h index ac65756245..2775fc4030 100644 --- a/shared/test/common/mocks/linux/mock_drm_allocation.h +++ b/shared/test/common/mocks/linux/mock_drm_allocation.h @@ -36,6 +36,10 @@ class MockBufferObject : public BufferObject { TaskCountType completionValue = 0; }; + void setSize(size_t mockSize) { + size = mockSize; + } + std::optional execReturnValue; std::vector passedExecParams{}; MockBufferObject(uint32_t rootDeviceIndex, Drm *drm) : BufferObject(rootDeviceIndex, drm, CommonConstants::unsupportedPatIndex, 0, 0, 1) { diff --git a/shared/test/unit_test/os_interface/linux/drm_residency_handler_prelim_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_residency_handler_prelim_tests.cpp index cb3fde891a..b4abe36bcf 100644 --- a/shared/test/unit_test/os_interface/linux/drm_residency_handler_prelim_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_residency_handler_prelim_tests.cpp @@ -23,6 +23,7 @@ #include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/libult/linux/drm_query_mock.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" +#include "shared/test/common/mocks/linux/mock_drm_allocation.h" #include "shared/test/common/mocks/linux/mock_drm_memory_manager.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" @@ -555,6 +556,36 @@ TEST_F(DrmMemoryOperationsHandlerBindTest, givenMakeBOsResidentFailsThenMakeResi delete allocation; } +TEST_F(DrmMemoryOperationsHandlerBindTest, + givenDrmAllocationWithChunkingAndmakeResidentWithinOsContextCalledThenprefetchBOWithChunkingCalled) { + struct MockDrmAllocationBOsResident : public DrmAllocation { + MockDrmAllocationBOsResident(uint32_t rootDeviceIndex, AllocationType allocationType, BufferObjects &bos, void *ptrIn, uint64_t gpuAddress, size_t sizeIn, MemoryPool pool) + : DrmAllocation(rootDeviceIndex, allocationType, bos, ptrIn, gpuAddress, sizeIn, pool) { + } + }; + DebugManagerStateRestore restore; + DebugManager.flags.EnableBOChunking.set(3); + DebugManager.flags.EnableBOChunkingPreferredLocationHint.set(true); + DebugManager.flags.PrintBOPrefetchingResult.set(1); + + auto size = 4096u; + BufferObjects bos; + MockBufferObject mockBo(device->getRootDeviceIndex(), mock, 3, 0, 0, 1); + mockBo.isChunked = 1; + mockBo.setSize(1024); + bos.push_back(&mockBo); + + auto allocation = new MockDrmAllocationBOsResident(0, AllocationType::UNKNOWN, bos, nullptr, 0u, size, MemoryPool::LocalMemory); + allocation->setNumHandles(1); + allocation->storageInfo.isChunked = 1; + allocation->storageInfo.numOfChunks = 4; + allocation->storageInfo.subDeviceBitfield = 0b0011; + auto graphicsAllocation = static_cast(allocation); + + EXPECT_EQ(operationHandler->makeResidentWithinOsContext(device->getDefaultEngine().osContext, ArrayRef(&graphicsAllocation, 1), false), MemoryOperationsStatus::SUCCESS); + delete allocation; +} + TEST_F(DrmMemoryOperationsHandlerBindTest, givenDrmMemoryOperationBindWhenMakeResidentWithinOsContextEvictableAllocationThenAllocationIsNotMarkedAsAlwaysResident) { auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); diff --git a/shared/test/unit_test/os_interface/linux/drm_vm_bind_prelim_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_vm_bind_prelim_tests.cpp index 211cdf7faa..6b4a9675b4 100644 --- a/shared/test/unit_test/os_interface/linux/drm_vm_bind_prelim_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_vm_bind_prelim_tests.cpp @@ -62,7 +62,7 @@ TEST(DrmVmBindTest, givenBoRequiringExplicitResidencyWhenBindingThenMakeResident } TEST(DrmVmBindTest, - givenBoWithChunkingRequiringExplicitResidencyWhenBindingThenMakeResidentFlagIsNotPassedAndUserFenceIsSetup) { + givenBoWithChunkingRequiringExplicitResidencyWhenBindingThenMakeResidentFlagIsPassedAndUserFenceIsSetup) { auto executionEnvironment = std::make_unique(); executionEnvironment->rootDeviceEnvironments[0]->initGmm(); executionEnvironment->initializeMemoryManager(); @@ -80,7 +80,7 @@ TEST(DrmVmBindTest, bo.bind(&osContext, vmHandleId); if (requireResidency) { - EXPECT_EQ(DrmPrelimHelper::getImmediateVmBindFlag(), drm.context.receivedVmBind->flags); + EXPECT_EQ(DrmPrelimHelper::getImmediateVmBindFlag() | DrmPrelimHelper::getMakeResidentVmBindFlag(), drm.context.receivedVmBind->flags); ASSERT_TRUE(drm.context.receivedVmBindUserFence); EXPECT_EQ(castToUint64(drm.getFenceAddr(vmHandleId)), drm.context.receivedVmBindUserFence->addr); EXPECT_EQ(drm.fenceVal[vmHandleId], drm.context.receivedVmBindUserFence->val); diff --git a/shared/test/unit_test/os_interface/linux/drm_with_prelim_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_with_prelim_tests.cpp index da94fa9bdb..b67f4019a9 100644 --- a/shared/test/unit_test/os_interface/linux/drm_with_prelim_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_with_prelim_tests.cpp @@ -35,7 +35,6 @@ class DrmPrelimMock : public DrmMock { void getPrelimVersion(std::string &prelimVersion) override { prelimVersion = "2.0"; } - int handleRemainingRequests(DrmIoctl request, void *arg) override { if (request == DrmIoctl::Query && arg != nullptr) { auto queryArg = static_cast(arg); @@ -157,7 +156,7 @@ TEST_F(IoctlHelperPrelimFixture, givenPrelimsWhenCreateGemExtWithChunkingThenGet MemRegionsVec memClassInstance = {{drm_i915_gem_memory_class::I915_MEMORY_CLASS_DEVICE, 0}}; ioctlHelper->createGemExt(memClassInstance, allocSize, handle, 0, {}, -1, true, getNumOfChunks); std::string output = testing::internal::GetCapturedStdout(); - std::string expectedOutput("GEM_CREATE_EXT with BOChunkingSize 65536, chunkingParamRegion.param.data 65536, numOfChunks 2\n"); + std::string expectedOutput("GEM_CREATE_EXT BO-1 with BOChunkingSize 65536, chunkingParamRegion.param.data 65536, numOfChunks 2\n"); EXPECT_EQ(expectedOutput, output); EXPECT_EQ(2u, getNumOfChunks); } @@ -460,6 +459,65 @@ TEST_F(IoctlHelperPrelimFixture, givenDrmAllocationWhenSetMemPrefetchFailsThenRe EXPECT_FALSE(allocation.setMemPrefetch(drm.get(), subDeviceIds)); } +TEST_F(IoctlHelperPrelimFixture, + givenDrmAllocationWithChunkingAndsetMemPrefetchCalledSuccessIsReturned) { + SubDeviceIdsVec subDeviceIds{0, 1}; + DebugManagerStateRestore restore; + DebugManager.flags.EnableBOChunking.set(1); + DebugManager.flags.EnableBOChunkingPreferredLocationHint.set(true); + DebugManager.flags.PrintBOPrefetchingResult.set(1); + + std::vector memRegions{ + {{drm_i915_gem_memory_class::I915_MEMORY_CLASS_SYSTEM, 0}, MemoryConstants::chunkThreshold * 4, 0}, + {{drm_i915_gem_memory_class::I915_MEMORY_CLASS_DEVICE, 0}, MemoryConstants::chunkThreshold * 4, 0}, + {{drm_i915_gem_memory_class::I915_MEMORY_CLASS_DEVICE, 1}, MemoryConstants::chunkThreshold * 4, 0}, + {{drm_i915_gem_memory_class::I915_MEMORY_CLASS_DEVICE, 2}, MemoryConstants::chunkThreshold * 4, 0}}; + drm->memoryInfo.reset(new MemoryInfo(memRegions, *drm)); + + drm->ioctlCallsCount = 0; + MockBufferObject bo(0u, drm.get(), 3, 0, 0, 1); + bo.isChunked = 1; + bo.setSize(1024); + MockDrmAllocation allocation(0u, AllocationType::BUFFER, MemoryPool::LocalMemory); + allocation.bufferObjects[0] = &bo; + allocation.storageInfo.memoryBanks = 0x5; + allocation.setNumHandles(1); + allocation.storageInfo.isChunked = 1; + allocation.storageInfo.numOfChunks = 4; + allocation.storageInfo.subDeviceBitfield = 0b0001; + EXPECT_TRUE(allocation.setMemPrefetch(drm.get(), subDeviceIds)); +} + +TEST_F(IoctlHelperPrelimFixture, + givenDrmAllocationWithChunkingAndsetMemPrefetchWithIoctlFailureThenFailureReturned) { + SubDeviceIdsVec subDeviceIds{0, 1}; + DebugManagerStateRestore restore; + DebugManager.flags.EnableBOChunking.set(1); + DebugManager.flags.EnableBOChunkingPreferredLocationHint.set(true); + DebugManager.flags.PrintBOPrefetchingResult.set(1); + + std::vector memRegions{ + {{drm_i915_gem_memory_class::I915_MEMORY_CLASS_SYSTEM, 0}, MemoryConstants::chunkThreshold * 4, 0}, + {{drm_i915_gem_memory_class::I915_MEMORY_CLASS_DEVICE, 0}, MemoryConstants::chunkThreshold * 4, 0}, + {{drm_i915_gem_memory_class::I915_MEMORY_CLASS_DEVICE, 1}, MemoryConstants::chunkThreshold * 4, 0}, + {{drm_i915_gem_memory_class::I915_MEMORY_CLASS_DEVICE, 2}, MemoryConstants::chunkThreshold * 4, 0}}; + drm->memoryInfo.reset(new MemoryInfo(memRegions, *drm)); + + drm->ioctlCallsCount = 0; + MockBufferObject bo(0u, drm.get(), 3, 0, 0, 1); + bo.isChunked = 1; + bo.setSize(1024); + MockDrmAllocation allocation(0u, AllocationType::BUFFER, MemoryPool::LocalMemory); + allocation.bufferObjects[0] = &bo; + allocation.storageInfo.memoryBanks = 0x5; + allocation.setNumHandles(1); + allocation.storageInfo.isChunked = 1; + allocation.storageInfo.numOfChunks = 4; + allocation.storageInfo.subDeviceBitfield = 0b0001; + drm->ioctlRetVal = EINVAL; + EXPECT_FALSE(allocation.setMemPrefetch(drm.get(), subDeviceIds)); +} + TEST_F(IoctlHelperPrelimFixture, givenVariousDirectSubmissionFlagSettingWhenCreateDrmContextIsCalledThenCorrectFlagsArePassedToIoctl) { DebugManagerStateRestore stateRestore; uint32_t vmId = 0u; diff --git a/shared/test/unit_test/os_interface/linux/prelim_helper_func.cpp b/shared/test/unit_test/os_interface/linux/prelim_helper_func.cpp index 46df3d317b..fef8dcf2f0 100644 --- a/shared/test/unit_test/os_interface/linux/prelim_helper_func.cpp +++ b/shared/test/unit_test/os_interface/linux/prelim_helper_func.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2022 Intel Corporation + * Copyright (C) 2021-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -90,6 +90,7 @@ int handlePrelimRequests(DrmIoctl request, void *arg, int ioctlRetVal, int query } } else if (request == DrmIoctl::GemVmPrefetch) { auto vmPrefetchParams = static_cast(arg); + // Valid vm_id must be nonzero EXPECT_NE(0u, vmPrefetchParams->vm_id); } return ioctlRetVal;