Add DG1 support to OpenCL and Level Zero (2/n)

Source location of Linux kernel DRM/i915 interface headers:
https://repositories.intel.com/graphics/kernel-api/index.html

Related-To: NEO-4744

Change-Id: I08a9ab651d8594e9a04d6a83dc48682d4fa53702
Signed-off-by: Slawomir Milczarek <slawomir.milczarek@intel.com>
This commit is contained in:
Slawomir Milczarek
2020-06-22 14:34:44 +02:00
committed by sys_ocldev
parent f0874dff65
commit 7e56a8303a
12 changed files with 5709 additions and 4 deletions

View File

@@ -17,8 +17,6 @@ set(NEO_CORE_OS_INTERFACE_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/drm_gem_close_worker.h
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager.cpp
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/drm_memory_manager_allocate_in_device_pool.cpp
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/drm_engine_mapper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/drm_engine_mapper.h
${CMAKE_CURRENT_SOURCE_DIR}/drm_neo.h
${CMAKE_CURRENT_SOURCE_DIR}/drm_neo.cpp
@@ -29,7 +27,6 @@ set(NEO_CORE_OS_INTERFACE_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/hw_device_id.h
${CMAKE_CURRENT_SOURCE_DIR}/hw_device_id_linux.cpp
${CMAKE_CURRENT_SOURCE_DIR}/linux_inc.cpp
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/drm_query.cpp
${CMAKE_CURRENT_SOURCE_DIR}/engine_info.h
${CMAKE_CURRENT_SOURCE_DIR}/memory_info.h
${CMAKE_CURRENT_SOURCE_DIR}/os_context_linux.cpp
@@ -52,5 +49,19 @@ set(NEO_CORE_OS_INTERFACE_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/sys_calls.h
)
if(SUPPORT_DG1 AND "${BRANCH_TYPE}" STREQUAL "")
list(APPEND NEO_CORE_OS_INTERFACE_LINUX
${CMAKE_CURRENT_SOURCE_DIR}/drm_engine_mapper_dg1.cpp
${CMAKE_CURRENT_SOURCE_DIR}/drm_memory_manager_allocate_in_device_pool_dg1.cpp
${CMAKE_CURRENT_SOURCE_DIR}/drm_query_dg1.cpp
)
else()
list(APPEND NEO_CORE_OS_INTERFACE_LINUX
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/drm_engine_mapper.cpp
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/drm_memory_manager_allocate_in_device_pool.cpp
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/drm_query.cpp
)
endif()
set_property(GLOBAL PROPERTY NEO_CORE_OS_INTERFACE_LINUX ${NEO_CORE_OS_INTERFACE_LINUX})
add_subdirectories()

View File

@@ -0,0 +1,23 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/os_interface/linux/drm_engine_mapper.h"
#include "drm/i915_drm.h"
namespace NEO {
unsigned int DrmEngineMapper::engineNodeMap(aub_stream::EngineType engineType) {
if (aub_stream::ENGINE_RCS == engineType) {
return I915_EXEC_RENDER;
} else if (aub_stream::ENGINE_BCS == engineType) {
return I915_EXEC_BLT;
}
UNRECOVERABLE_IF(true);
}
} // namespace NEO

View File

@@ -0,0 +1,249 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/gmm_helper/gmm.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/gmm_helper/resource_info.h"
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/string.h"
#include "shared/source/helpers/surface_format_info.h"
#include "shared/source/os_interface/linux/drm_memory_manager.h"
#include "shared/source/os_interface/linux/memory_info_impl.h"
#include "opencl/source/memory_manager/memory_banks.h"
namespace NEO {
BufferObject *createBufferObjectInMemoryRegion(Drm *drm, uint64_t gpuAddress, size_t size, uint32_t memoryBanks) {
auto memoryInfo = static_cast<MemoryInfoImpl *>(drm->getMemoryInfo());
if (!memoryInfo) {
return nullptr;
}
auto regionClassAndInstance = memoryInfo->getMemoryRegionClassAndInstance(memoryBanks);
if (regionClassAndInstance.memory_class == MemoryInfoImpl::invalidMemoryRegion()) {
return nullptr;
}
drm_i915_gem_memory_class_instance memRegions{};
memRegions.memory_class = regionClassAndInstance.memory_class;
memRegions.memory_instance = regionClassAndInstance.memory_instance;
drm_i915_gem_object_param regionParam{};
regionParam.size = 1;
regionParam.data = reinterpret_cast<uintptr_t>(&memRegions);
regionParam.param = I915_OBJECT_PARAM | I915_PARAM_MEMORY_REGIONS;
drm_i915_gem_create_ext_setparam setparamRegion{};
setparamRegion.base.name = I915_GEM_CREATE_EXT_SETPARAM;
setparamRegion.param = regionParam;
drm_i915_gem_create_ext createExt{};
createExt.size = size;
createExt.extensions = reinterpret_cast<uintptr_t>(&setparamRegion);
auto ret = drm->ioctl(DRM_IOCTL_I915_GEM_CREATE_EXT, &createExt);
if (ret != 0) {
return nullptr;
}
auto bo = new (std::nothrow) BufferObject(drm, createExt.handle, size);
if (!bo) {
return nullptr;
}
bo->setAddress(gpuAddress);
return bo;
}
uint64_t getGpuAddress(GraphicsAllocation::AllocationType allocType, GfxPartition *gfxPartition, size_t &sizeAllocated, const void *hostPtr, bool resource48Bit) {
uint64_t gpuAddress = 0;
switch (allocType) {
case GraphicsAllocation::AllocationType::SVM_GPU:
gpuAddress = reinterpret_cast<uint64_t>(hostPtr);
sizeAllocated = 0;
break;
case GraphicsAllocation::AllocationType::KERNEL_ISA:
case GraphicsAllocation::AllocationType::INTERNAL_HEAP:
gpuAddress = GmmHelper::canonize(gfxPartition->heapAllocate(HeapIndex::HEAP_INTERNAL_DEVICE_MEMORY, sizeAllocated));
break;
case GraphicsAllocation::AllocationType::WRITE_COMBINED:
sizeAllocated = 0;
break;
default:
auto heapIndex = HeapIndex::HEAP_STANDARD64KB;
if ((gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0) && !resource48Bit) {
heapIndex = HeapIndex::HEAP_EXTENDED;
}
gpuAddress = GmmHelper::canonize(gfxPartition->heapAllocate(heapIndex, sizeAllocated));
break;
}
return gpuAddress;
}
bool createDrmAllocation(Drm *drm, DrmAllocation *allocation, uint64_t gpuAddress) {
std::array<std::unique_ptr<BufferObject>, EngineLimits::maxHandleCount> bos{};
auto &storageInfo = allocation->storageInfo;
auto boAddress = gpuAddress;
for (auto handleId = 0u; handleId < storageInfo.getNumBanks(); handleId++) {
uint32_t memoryBanks = static_cast<uint32_t>(storageInfo.getMemoryBanks());
if (storageInfo.getNumBanks() > 1) {
memoryBanks &= 1u << handleId;
}
auto boSize = alignUp(allocation->getGmm(handleId)->gmmResourceInfo->getSizeAllocation(), MemoryConstants::pageSize64k);
bos[handleId] = std::unique_ptr<BufferObject>(createBufferObjectInMemoryRegion(drm, boAddress, boSize, memoryBanks));
if (nullptr == bos[handleId]) {
return false;
}
allocation->getBufferObjectToModify(handleId) = bos[handleId].get();
if (storageInfo.multiStorage) {
boAddress += boSize;
}
}
for (auto &bo : bos) {
bo.release();
}
return true;
}
GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) {
status = AllocationStatus::RetryInNonDevicePool;
if (!this->localMemorySupported[allocationData.rootDeviceIndex] ||
allocationData.flags.useSystemMemory ||
(allocationData.flags.allow32Bit && this->force32bitAllocations) ||
allocationData.type == GraphicsAllocation::AllocationType::SHARED_RESOURCE_COPY) {
return nullptr;
}
std::unique_ptr<Gmm> gmm;
size_t sizeAligned = 0;
auto numHandles = allocationData.storageInfo.getNumBanks();
DEBUG_BREAK_IF(numHandles > 1);
if (allocationData.type == GraphicsAllocation::AllocationType::IMAGE) {
allocationData.imgInfo->useLocalMemory = true;
gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), *allocationData.imgInfo, allocationData.storageInfo);
sizeAligned = alignUp(allocationData.imgInfo->size, MemoryConstants::pageSize64k);
} else {
bool preferRenderCompressed = (allocationData.type == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
if (allocationData.type == GraphicsAllocation::AllocationType::WRITE_COMBINED) {
sizeAligned = alignUp(allocationData.size + MemoryConstants::pageSize64k, 2 * MemoryConstants::megaByte) + 2 * MemoryConstants::megaByte;
} else {
sizeAligned = alignUp(allocationData.size, MemoryConstants::pageSize64k);
}
gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmClientContext(), nullptr, sizeAligned, allocationData.flags.uncacheable,
preferRenderCompressed, false, allocationData.storageInfo);
}
auto sizeAllocated = sizeAligned;
auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
auto gpuAddress = getGpuAddress(allocationData.type, gfxPartition, sizeAllocated, allocationData.hostPtr, allocationData.flags.resource48Bit);
auto allocation = std::make_unique<DrmAllocation>(allocationData.rootDeviceIndex, numHandles, allocationData.type, nullptr, nullptr, gpuAddress, sizeAligned, MemoryPool::LocalMemory);
allocation->setDefaultGmm(gmm.release());
allocation->storageInfo = allocationData.storageInfo;
allocation->setFlushL3Required(allocationData.flags.flushL3);
allocation->setReservedAddressRange(reinterpret_cast<void *>(gpuAddress), sizeAllocated);
if (!createDrmAllocation(&getDrm(allocationData.rootDeviceIndex), allocation.get(), gpuAddress)) {
for (auto handleId = 0u; handleId < allocationData.storageInfo.getNumBanks(); handleId++) {
delete allocation->getGmm(handleId);
}
gfxPartition->freeGpuAddressRange(GmmHelper::decanonize(gpuAddress), sizeAllocated);
status = AllocationStatus::Error;
return nullptr;
}
if (allocationData.type == GraphicsAllocation::AllocationType::WRITE_COMBINED) {
auto cpuAddress = lockResource(allocation.get());
auto alignedCpuAddress = alignDown(cpuAddress, 2 * MemoryConstants::megaByte);
auto offset = ptrDiff(cpuAddress, alignedCpuAddress);
allocation->setAllocationOffset(offset);
allocation->setCpuPtrAndGpuAddress(cpuAddress, reinterpret_cast<uint64_t>(alignedCpuAddress));
DEBUG_BREAK_IF(allocation->storageInfo.multiStorage);
allocation->getBO()->setAddress(reinterpret_cast<uint64_t>(cpuAddress));
}
if (allocationData.flags.requiresCpuAccess) {
auto cpuAddress = lockResource(allocation.get());
allocation->setCpuPtrAndGpuAddress(cpuAddress, gpuAddress);
}
if (useInternal32BitAllocator(allocationData.type)) {
allocation->setGpuBaseAddress(GmmHelper::canonize(getInternalHeapBaseAddress(allocationData.rootDeviceIndex)));
}
status = AllocationStatus::Success;
return allocation.release();
}
void *DrmMemoryManager::lockResourceInLocalMemoryImpl(GraphicsAllocation &graphicsAllocation) {
auto bo = static_cast<DrmAllocation &>(graphicsAllocation).getBO();
if (graphicsAllocation.getAllocationType() == GraphicsAllocation::AllocationType::WRITE_COMBINED) {
auto addr = lockResourceInLocalMemoryImpl(bo);
auto alignedAddr = alignUp(addr, MemoryConstants::pageSize64k);
auto notUsedSize = ptrDiff(alignedAddr, addr);
munmapFunction(addr, notUsedSize);
bo->setLockedAddress(alignedAddr);
return bo->peekLockedAddress();
}
return lockResourceInLocalMemoryImpl(bo);
}
void *DrmMemoryManager::lockResourceInLocalMemoryImpl(BufferObject *bo) {
if (bo == nullptr)
return nullptr;
drm_i915_gem_mmap_offset mmapOffset = {};
mmapOffset.handle = bo->peekHandle();
mmapOffset.flags = I915_MMAP_OFFSET_WC;
auto rootDeviceIndex = this->getRootDeviceIndex(bo->drm);
if (getDrm(rootDeviceIndex).ioctl(DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mmapOffset) != 0) {
return nullptr;
}
auto addr = mmapFunction(nullptr, bo->peekSize(), PROT_WRITE | PROT_READ, MAP_SHARED, getDrm(rootDeviceIndex).getFileDescriptor(), mmapOffset.offset);
DEBUG_BREAK_IF(addr == nullptr);
bo->setLockedAddress(addr);
return bo->peekLockedAddress();
}
void DrmMemoryManager::unlockResourceInLocalMemoryImpl(BufferObject *bo) {
if (bo == nullptr)
return;
auto ret = munmapFunction(bo->peekLockedAddress(), bo->peekSize());
DEBUG_BREAK_IF(ret != 0);
UNUSED_VARIABLE(ret);
bo->setLockedAddress(nullptr);
}
bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) {
if (graphicsAllocation->getUnderlyingBuffer()) {
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, memoryToCopy, sizeToCopy);
}
auto drmAllocation = static_cast<DrmAllocation *>(graphicsAllocation);
for (auto handleId = 0u; handleId < graphicsAllocation->storageInfo.getNumBanks(); handleId++) {
auto ptr = lockResourceInLocalMemoryImpl(drmAllocation->getBOs()[handleId]);
if (!ptr) {
return false;
}
memcpy_s(ptr, graphicsAllocation->getUnderlyingBufferSize(), memoryToCopy, sizeToCopy);
this->unlockResourceInLocalMemoryImpl(drmAllocation->getBOs()[handleId]);
}
return true;
}
uint64_t DrmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex) {
auto memoryInfo = static_cast<MemoryInfoImpl *>(getDrm(rootDeviceIndex).getMemoryInfo());
if (!memoryInfo) {
return 0;
}
return memoryInfo->getMemoryRegionSize(MemoryBanks::Bank0);
}
} // namespace NEO

View File

@@ -0,0 +1,52 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/os_interface/linux/drm_engine_mapper.h"
#include "shared/source/os_interface/linux/memory_info_impl.h"
#include "drm_neo.h"
#include <fstream>
namespace NEO {
int Drm::getMaxGpuFrequency(HardwareInfo &hwInfo, int &maxGpuFrequency) {
maxGpuFrequency = 0;
std::string clockSysFsPath = getSysFsPciPath();
clockSysFsPath += "/gt_max_freq_mhz";
std::ifstream ifs(clockSysFsPath.c_str(), std::ifstream::in);
if (ifs.fail()) {
return -1;
}
ifs >> maxGpuFrequency;
ifs.close();
return 0;
}
bool Drm::queryEngineInfo() {
return true;
}
bool Drm::queryMemoryInfo() {
auto length = 0;
auto dataQuery = this->query(DRM_I915_QUERY_MEMORY_REGIONS, length);
auto data = reinterpret_cast<drm_i915_query_memory_regions *>(dataQuery.get());
if (data) {
this->memoryInfo.reset(new MemoryInfoImpl(data->regions, data->num_regions));
return true;
}
return false;
}
unsigned int Drm::bindDrmContext(uint32_t drmContextId, uint32_t deviceIndex, aub_stream::EngineType engineType) {
return DrmEngineMapper::engineNodeMap(engineType);
}
} // namespace NEO

View File

@@ -0,0 +1,51 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/os_interface/linux/drm_neo.h"
#include "shared/source/os_interface/linux/memory_info.h"
#include "drm/i915_drm.h"
#include <cstddef>
#include <cstdint>
#include <vector>
namespace NEO {
struct MemoryInfoImpl : public MemoryInfo {
~MemoryInfoImpl() override = default;
MemoryInfoImpl(const drm_i915_memory_region_info *regionInfo, size_t count) : regions(regionInfo, regionInfo + count) {
}
drm_i915_gem_memory_class_instance getMemoryRegionClassAndInstance(uint32_t memoryBank) {
auto index = (memoryBank > 0) ? Math::log2(memoryBank) + 1 : 0;
if (index < regions.size()) {
return regions[index].region;
}
return {invalidMemoryRegion(), invalidMemoryRegion()};
}
size_t getMemoryRegionSize(uint32_t memoryBank) {
auto index = (memoryBank > 0) ? Math::log2(memoryBank) + 1 : 0;
if (index < regions.size()) {
return regions[index].probed_size;
}
return 0;
}
static constexpr uint16_t invalidMemoryRegion() {
return static_cast<uint16_t>(-1);
}
std::vector<drm_i915_memory_region_info> regions;
};
} // namespace NEO