diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index 9671363ddd..cbc6243190 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -104,8 +104,14 @@ ze_result_t EventPoolImp::initialize(DriverHandle *driver, Context *context, uin } } else { + const bool isShareable = (eventPoolFlags & ZE_EVENT_POOL_FLAG_IPC); + if (isShareable) { + allocationType = NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER; + } + NEO::AllocationProperties allocationProperties{*rootDeviceIndices.begin(), alignedSize, allocationType, systemMemoryBitfield}; allocationProperties.alignment = eventAlignment; + allocationProperties.flags.shareable = isShareable; std::vector rootDeviceIndicesVector = {rootDeviceIndices.begin(), rootDeviceIndices.end()}; eventPoolPtr = driver->getMemoryManager()->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndicesVector, diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index 1f5014dccd..6b028c1163 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -258,7 +258,7 @@ TEST_F(EventPoolIPCHandleTests, whenGettingIpcHandleForEventPoolThenHandleAndNum ze_event_pool_desc_t eventPoolDesc = { ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, nullptr, - ZE_EVENT_POOL_FLAG_HOST_VISIBLE, + ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_IPC, numEvents}; auto deviceHandle = device->toHandle(); diff --git a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index 2647c01f08..81f7ed9122 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -1240,6 +1240,34 @@ TEST_F(DrmMemoryManagerTest, GivenShareableEnabledWhenAskedToCreateGraphicsAlloc memoryManager->freeGraphicsMemory(allocation); } +TEST_F(DrmMemoryManagerTest, GivenAllocationTypeThatRequiresCpuAccessForKmdAllocationThenLockTheResourceIsCalled) { + struct DrmMemoryManagerToTestLockResource : public DrmMemoryManager { + using DrmMemoryManager::allocateMemoryByKMD; + + DrmMemoryManagerToTestLockResource(ExecutionEnvironment &executionEnvironment) + : DrmMemoryManager(gemCloseWorkerMode::gemCloseWorkerInactive, false, false, executionEnvironment) { + } + void *lockResourceImpl(GraphicsAllocation &allocation) override { + return reinterpret_cast(0xDEADBEEF); + } + }; + + DrmMemoryManagerToTestLockResource drmMemoryManager(*executionEnvironment); + + mock->ioctl_expected.gemWait = 1; + mock->ioctl_expected.gemCreate = 1; + mock->ioctl_expected.gemClose = 1; + + allocationData.type = AllocationType::TIMESTAMP_PACKET_TAG_BUFFER; + + auto allocation = drmMemoryManager.allocateMemoryByKMD(allocationData); + EXPECT_NE(nullptr, allocation); + EXPECT_NE(0u, allocation->getGpuAddress()); + EXPECT_EQ(0xDEADBEEF, reinterpret_cast(allocation->getUnderlyingBuffer())); + + memoryManager->freeGraphicsMemory(allocation); +} + TEST_F(DrmMemoryManagerTest, GivenMisalignedHostPtrAndMultiplePagesSizeWhenAskedForGraphicsAllocationThenItContainsAllFragmentsWithProperGpuAdrresses) { mock->ioctl_expected.gemUserptr = 3; mock->ioctl_expected.gemWait = 3; diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index c3069cfb3b..c98b6390b3 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -389,3 +389,4 @@ AccessCountersGranularity = -1 OverridePatIndex = -1 UseTileMemoryBankInVirtualMemoryCreation = -1 DisableScratchPages = 0 +SetVmAdviseAtomicAttribute = -1 diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 947d2b8783..b8d69d9c67 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -192,6 +192,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, AccessCountersTrigger, -1, "-1: default - disabl DECLARE_DEBUG_VARIABLE(int32_t, AccessCountersGranularity, -1, "-1: default - ACG_2MB, >= 0: granularites - 0: ACG_128K, 1: ACG_2M, 2: ACG_16M, 3: ACG_16M") DECLARE_DEBUG_VARIABLE(int32_t, OverridePatIndex, -1, "-1: default, >=0: PatIndex to override") DECLARE_DEBUG_VARIABLE(int32_t, UseTileMemoryBankInVirtualMemoryCreation, -1, "-1: default - on, 0: do not assign tile memory bank to virtual memory space, 1: assign tile memory bank to virtual memory space") +DECLARE_DEBUG_VARIABLE(int32_t, SetVmAdviseAtomicAttribute, -1, "-1: default - atomic system, 0: atomic none, 1: atomic device, 2: atomic system)") DECLARE_DEBUG_VARIABLE(bool, DisableScratchPages, false, "Disable scratch pages during VM creations") /*LOGGING FLAGS*/ DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level") diff --git a/shared/source/generated/gen12lp/hw_cmds_generated_gen12lp.inl b/shared/source/generated/gen12lp/hw_cmds_generated_gen12lp.inl index 0d1ad58075..8f71c4727a 100644 --- a/shared/source/generated/gen12lp/hw_cmds_generated_gen12lp.inl +++ b/shared/source/generated/gen12lp/hw_cmds_generated_gen12lp.inl @@ -5062,7 +5062,7 @@ typedef struct tagMI_STORE_DATA_IMM { ADDRESS_ALIGN_SIZE = 0x4, } ADDRESS; inline void setAddress(const uint64_t value) { - UNRECOVERABLE_IF(value > 0x3fffffffffffffffL); + UNRECOVERABLE_IF((value >> ADDRESS_BIT_SHIFT) > 0x3fffffffffffffffL); TheStructure.Common.Address = value >> ADDRESS_BIT_SHIFT; } inline uint64_t getAddress() const { diff --git a/shared/source/os_interface/linux/CMakeLists.txt b/shared/source/os_interface/linux/CMakeLists.txt index 75925f7414..0e7d2f7ba2 100644 --- a/shared/source/os_interface/linux/CMakeLists.txt +++ b/shared/source/os_interface/linux/CMakeLists.txt @@ -37,11 +37,10 @@ set(NEO_CORE_OS_INTERFACE_LINUX ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler.h ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_bind.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_bind.h + ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_create.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_default.cpp ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_default.h ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager_create_multi_host_allocation.cpp - ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}drm_memory_manager_local_memory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_operations_handler_create.cpp ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}drm_query.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_info_config_drm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_device_id.h diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index 7da9420814..33c62a00cf 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -21,6 +21,8 @@ #include "shared/source/helpers/string.h" #include "shared/source/helpers/surface_format_info.h" #include "shared/source/memory_manager/host_ptr_manager.h" +#include "shared/source/memory_manager/memory_banks.h" +#include "shared/source/memory_manager/memory_pool.h" #include "shared/source/memory_manager/residency.h" #include "shared/source/os_interface/linux/allocator_helper.h" #include "shared/source/os_interface/linux/drm_memory_operations_handler.h" @@ -504,6 +506,12 @@ GraphicsAllocation *DrmMemoryManager::allocateMemoryByKMD(const AllocationData & allocation->setDefaultGmm(gmm.release()); allocation->setReservedAddressRange(reinterpret_cast(gpuRange), bufferSize); + + if (GraphicsAllocation::isCpuAccessRequired(allocationData.type)) { + auto cpuAddress = lockResource(allocation); + allocation->setCpuPtrAndGpuAddress(cpuAddress, gpuRange); + } + bo.release(); return allocation; } @@ -1616,4 +1624,160 @@ void *DrmMemoryManager::lockResourceInLocalMemoryImpl(BufferObject *bo) { return bo->peekLockedAddress(); } +void createMemoryRegionsForSharedAllocation(const HardwareInfo &hwInfo, MemoryInfo &memoryInfo, const AllocationData &allocationData, MemRegionsVec &memRegions) { + auto memoryBanks = allocationData.storageInfo.memoryBanks; + + if (allocationData.usmInitialPlacement == GraphicsAllocation::UsmInitialPlacement::CPU) { + //System memory region + auto regionClassAndInstance = memoryInfo.getMemoryRegionClassAndInstance(0u, hwInfo); + memRegions.push_back(regionClassAndInstance); + } + + //All local memory regions + size_t currentBank = 0; + size_t i = 0; + + while (i < memoryBanks.count()) { + if (memoryBanks.test(currentBank)) { + auto regionClassAndInstance = memoryInfo.getMemoryRegionClassAndInstance(1u << currentBank, hwInfo); + memRegions.push_back(regionClassAndInstance); + i++; + } + currentBank++; + } + + if (allocationData.usmInitialPlacement == GraphicsAllocation::UsmInitialPlacement::GPU) { + //System memory region + auto regionClassAndInstance = memoryInfo.getMemoryRegionClassAndInstance(0u, hwInfo); + memRegions.push_back(regionClassAndInstance); + } +} + +GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const AllocationData &allocationData) { + auto &drm = this->getDrm(allocationData.rootDeviceIndex); + + const auto vmAdviseAttribute = drm.getIoctlHelper()->getVmAdviseAtomicAttribute(); + if (vmAdviseAttribute == 0) { + return nullptr; + } + + auto memoryInfo = drm.getMemoryInfo(); + if (!memoryInfo) { + return nullptr; + } + + auto size = allocationData.size; + auto alignment = allocationData.alignment; + + auto pHwInfo = drm.getRootDeviceEnvironment().getHardwareInfo(); + + MemRegionsVec memRegions; + createMemoryRegionsForSharedAllocation(*pHwInfo, *memoryInfo, allocationData, memRegions); + + uint32_t handle = 0; + auto ret = memoryInfo->createGemExt(&drm, memRegions, size, handle); + + if (ret) { + return nullptr; + } + + std::unique_ptr bo(new BufferObject(&drm, handle, size, maxOsContextCount)); + + if (!drm.getIoctlHelper()->setVmBoAdvise(&drm, bo->peekHandle(), vmAdviseAttribute, nullptr)) { + return nullptr; + } + + uint64_t offset = 0; + if (!retrieveMmapOffsetForBufferObject(allocationData.rootDeviceIndex, *bo, I915_MMAP_OFFSET_WB, offset)) { + return nullptr; + } + + auto totalSizeToAlloc = size + alignment; + auto cpuPointer = this->mmapFunction(0, totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + + auto cpuBasePointer = cpuPointer; + cpuPointer = alignUp(cpuPointer, alignment); + + this->mmapFunction(cpuPointer, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, drm.getFileDescriptor(), static_cast(offset)); + + bo->setAddress(reinterpret_cast(cpuPointer)); + + auto allocation = std::make_unique(allocationData.rootDeviceIndex, allocationData.type, bo.get(), cpuPointer, bo->peekAddress(), size, MemoryPool::System4KBPages); + allocation->setMmapPtr(cpuBasePointer); + allocation->setMmapSize(totalSizeToAlloc); + if (!allocation->setCacheRegion(&this->getDrm(allocationData.rootDeviceIndex), static_cast(allocationData.cacheRegion))) { + this->munmapFunction(cpuPointer, totalSizeToAlloc); + return nullptr; + } + + bo.release(); + + return allocation.release(); +} + +DrmAllocation *DrmMemoryManager::createUSMHostAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool hasMappedPtr) { + drm_prime_handle openFd = {0, 0, 0}; + openFd.fd = handle; + + auto ret = this->getDrm(properties.rootDeviceIndex).ioctl(DRM_IOCTL_PRIME_FD_TO_HANDLE, &openFd); + if (ret != 0) { + int err = this->getDrm(properties.rootDeviceIndex).getErrno(); + PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "ioctl(PRIME_FD_TO_HANDLE) failed with %d. errno=%d(%s)\n", ret, err, strerror(err)); + DEBUG_BREAK_IF(ret != 0); + return nullptr; + } + + if (hasMappedPtr) { + auto bo = new BufferObject(&getDrm(properties.rootDeviceIndex), openFd.handle, properties.size, maxOsContextCount); + bo->setAddress(properties.gpuAddress); + + return new DrmAllocation(properties.rootDeviceIndex, properties.allocationType, bo, reinterpret_cast(bo->peekAddress()), bo->peekSize(), + handle, MemoryPool::SystemCpuInaccessible); + } + + auto boHandle = openFd.handle; + auto bo = findAndReferenceSharedBufferObject(boHandle, properties.rootDeviceIndex); + + void *cpuPointer = nullptr; + size_t size = lseekFunction(handle, 0, SEEK_END); + + if (bo == nullptr) { + bo = new BufferObject(&getDrm(properties.rootDeviceIndex), boHandle, size, maxOsContextCount); + cpuPointer = this->mmapFunction(0, size, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + bo->setAddress(reinterpret_cast(cpuPointer)); + + uint64_t offset = 0; + if (!retrieveMmapOffsetForBufferObject(properties.rootDeviceIndex, *bo, I915_MMAP_OFFSET_WB, offset)) { + this->munmapFunction(cpuPointer, size); + delete bo; + return nullptr; + } + + [[maybe_unused]] auto retPtr = this->mmapFunction(cpuPointer, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, getDrm(properties.rootDeviceIndex).getFileDescriptor(), static_cast(offset)); + DEBUG_BREAK_IF(retPtr != cpuPointer); + + AllocationData allocationData = {}; + allocationData.rootDeviceIndex = properties.rootDeviceIndex; + allocationData.size = size; + emitPinningRequest(bo, allocationData); + + bo->setUnmapSize(size); + bo->setRootDeviceIndex(properties.rootDeviceIndex); + + pushSharedBufferObject(bo); + + DrmAllocation *drmAllocation = nullptr; + drmAllocation = new DrmAllocation(properties.rootDeviceIndex, properties.allocationType, bo, cpuPointer, bo->peekAddress(), bo->peekSize(), MemoryPool::System4KBPages); + drmAllocation->setMmapPtr(cpuPointer); + drmAllocation->setMmapSize(size); + drmAllocation->setReservedAddressRange(reinterpret_cast(cpuPointer), size); + drmAllocation->setCacheRegion(&this->getDrm(properties.rootDeviceIndex), static_cast(properties.cacheRegion)); + + return drmAllocation; + } + + return new DrmAllocation(properties.rootDeviceIndex, properties.allocationType, bo, reinterpret_cast(bo->peekAddress()), bo->peekSize(), + handle, MemoryPool::SystemCpuInaccessible); +} + } // namespace NEO diff --git a/shared/source/os_interface/linux/drm_memory_manager_local_memory.cpp b/shared/source/os_interface/linux/drm_memory_manager_local_memory.cpp deleted file mode 100644 index 12e48fd14b..0000000000 --- a/shared/source/os_interface/linux/drm_memory_manager_local_memory.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2019-2022 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/helpers/basic_math.h" -#include "shared/source/os_interface/linux/drm_memory_manager.h" - -namespace NEO { - -DrmAllocation *DrmMemoryManager::createUSMHostAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool hasMappedPtr) { - drm_prime_handle openFd = {0, 0, 0}; - openFd.fd = handle; - - auto ret = this->getDrm(properties.rootDeviceIndex).ioctl(DRM_IOCTL_PRIME_FD_TO_HANDLE, &openFd); - - if (ret != 0) { - int err = this->getDrm(properties.rootDeviceIndex).getErrno(); - PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "ioctl(PRIME_FD_TO_HANDLE) failed with %d. errno=%d(%s)\n", ret, err, strerror(err)); - DEBUG_BREAK_IF(ret != 0); - return nullptr; - } - - auto bo = new BufferObject(&getDrm(properties.rootDeviceIndex), openFd.handle, properties.size, maxOsContextCount); - bo->setAddress(properties.gpuAddress); - - return new DrmAllocation(properties.rootDeviceIndex, properties.allocationType, bo, reinterpret_cast(bo->peekAddress()), bo->peekSize(), - handle, MemoryPool::SystemCpuInaccessible); -} - -GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const AllocationData &allocationData) { - return nullptr; -} - -} // namespace NEO diff --git a/shared/source/os_interface/linux/ioctl_helper.h b/shared/source/os_interface/linux/ioctl_helper.h index 8e048cf17b..b7cd8771b5 100644 --- a/shared/source/os_interface/linux/ioctl_helper.h +++ b/shared/source/os_interface/linux/ioctl_helper.h @@ -110,6 +110,7 @@ class IoctlHelper { virtual void fillVmBindExtSyncFence(const std::unique_ptr &vmBindExtSyncFence, uint64_t fenceAddress, uint64_t fenceValue, uint64_t nextExtension) = 0; virtual std::optional getCopyClassSaturatePCIECapability() = 0; virtual std::optional getCopyClassSaturateLinkCapability() = 0; + virtual uint32_t getVmAdviseAtomicAttribute() = 0; virtual int vmBind(Drm *drm, const VmBindParams &vmBindParams) = 0; virtual int vmUnbind(Drm *drm, const VmBindParams &vmBindParams) = 0; virtual bool getEuStallProperties(std::array &properties, uint64_t dssBufferSize, uint64_t samplingRate, uint64_t pollPeriod, uint64_t engineInstance) = 0; @@ -155,6 +156,7 @@ class IoctlHelperUpstream : public IoctlHelper { void fillVmBindExtSyncFence(const std::unique_ptr &vmBindExtSyncFence, uint64_t fenceAddress, uint64_t fenceValue, uint64_t nextExtension) override; std::optional getCopyClassSaturatePCIECapability() override; std::optional getCopyClassSaturateLinkCapability() override; + uint32_t getVmAdviseAtomicAttribute() override; int vmBind(Drm *drm, const VmBindParams &vmBindParams) override; int vmUnbind(Drm *drm, const VmBindParams &vmBindParams) override; bool getEuStallProperties(std::array &properties, uint64_t dssBufferSize, uint64_t samplingRate, uint64_t pollPeriod, uint64_t engineInstance) override; @@ -213,6 +215,7 @@ class IoctlHelperPrelim20 : public IoctlHelper { void fillVmBindExtSyncFence(const std::unique_ptr &vmBindExtSyncFence, uint64_t fenceAddress, uint64_t fenceValue, uint64_t nextExtension) override; std::optional getCopyClassSaturatePCIECapability() override; std::optional getCopyClassSaturateLinkCapability() override; + uint32_t getVmAdviseAtomicAttribute() override; int vmBind(Drm *drm, const VmBindParams &vmBindParams) override; int vmUnbind(Drm *drm, const VmBindParams &vmBindParams) override; bool getEuStallProperties(std::array &properties, uint64_t dssBufferSize, uint64_t samplingRate, uint64_t pollPeriod, uint64_t engineInstance) override; diff --git a/shared/source/os_interface/linux/ioctl_helper_prelim.cpp b/shared/source/os_interface/linux/ioctl_helper_prelim.cpp index 6ccb71523f..4df4cd5765 100644 --- a/shared/source/os_interface/linux/ioctl_helper_prelim.cpp +++ b/shared/source/os_interface/linux/ioctl_helper_prelim.cpp @@ -451,6 +451,17 @@ std::optional IoctlHelperPrelim20::getCopyClassSaturateLinkCapability( return PRELIM_I915_COPY_CLASS_CAP_SATURATE_LINK; } +uint32_t IoctlHelperPrelim20::getVmAdviseAtomicAttribute() { + switch (NEO::DebugManager.flags.SetVmAdviseAtomicAttribute.get()) { + case 0: + return PRELIM_I915_VM_ADVISE_ATOMIC_NONE; + case 1: + return PRELIM_I915_VM_ADVISE_ATOMIC_DEVICE; + default: + return PRELIM_I915_VM_ADVISE_ATOMIC_SYSTEM; + } +} + prelim_drm_i915_gem_vm_bind translateVmBindParamsToPrelimStruct(const VmBindParams &vmBindParams) { prelim_drm_i915_gem_vm_bind vmBind{}; vmBind.vm_id = vmBindParams.vmId; diff --git a/shared/source/os_interface/linux/ioctl_helper_upstream.cpp b/shared/source/os_interface/linux/ioctl_helper_upstream.cpp index a7156ec4e8..bbac84c714 100644 --- a/shared/source/os_interface/linux/ioctl_helper_upstream.cpp +++ b/shared/source/os_interface/linux/ioctl_helper_upstream.cpp @@ -204,6 +204,10 @@ std::optional IoctlHelperUpstream::getCopyClassSaturateLinkCapability( return std::nullopt; } +uint32_t IoctlHelperUpstream::getVmAdviseAtomicAttribute() { + return 0; +} + int IoctlHelperUpstream::vmBind(Drm *drm, const VmBindParams &vmBindParams) { return 0; }