mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Reorganization directory structure [1/n]
Change-Id: Id1a94577437a4826a32411869f516fec20314ec0
This commit is contained in:
34
opencl/source/mem_obj/CMakeLists.txt
Normal file
34
opencl/source/mem_obj/CMakeLists.txt
Normal file
@@ -0,0 +1,34 @@
|
||||
#
|
||||
# Copyright (C) 2018-2020 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
set(RUNTIME_SRCS_MEM_OBJ
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/buffer.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/buffer.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/buffer_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/buffer_bdw_plus.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/buffer_factory_init.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_tgllp_plus.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/image_factory_init.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/map_operations_handler.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/map_operations_handler.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/mem_obj_helper.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper_common.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/pipe.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/pipe.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/buffer_ext.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/image_ext.inl
|
||||
)
|
||||
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_MEM_OBJ})
|
||||
set_property(GLOBAL PROPERTY RUNTIME_SRCS_MEM_OBJ ${RUNTIME_SRCS_MEM_OBJ})
|
||||
add_subdirectories()
|
||||
649
opencl/source/mem_obj/buffer.cpp
Normal file
649
opencl/source/mem_obj/buffer.cpp
Normal file
@@ -0,0 +1,649 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "mem_obj/buffer.h"
|
||||
|
||||
#include "core/command_stream/command_stream_receiver.h"
|
||||
#include "core/debug_settings/debug_settings_manager.h"
|
||||
#include "core/execution_environment/root_device_environment.h"
|
||||
#include "core/gmm_helper/gmm.h"
|
||||
#include "core/gmm_helper/gmm_helper.h"
|
||||
#include "core/helpers/aligned_memory.h"
|
||||
#include "core/helpers/hw_helper.h"
|
||||
#include "core/helpers/hw_info.h"
|
||||
#include "core/helpers/ptr_math.h"
|
||||
#include "core/helpers/string.h"
|
||||
#include "core/helpers/timestamp_packet.h"
|
||||
#include "core/memory_manager/host_ptr_manager.h"
|
||||
#include "core/memory_manager/memory_manager.h"
|
||||
#include "core/memory_manager/memory_operations_handler.h"
|
||||
#include "core/memory_manager/unified_memory_manager.h"
|
||||
|
||||
#include "command_queue/command_queue.h"
|
||||
#include "context/context.h"
|
||||
#include "device/cl_device.h"
|
||||
#include "helpers/memory_properties_flags_helpers.h"
|
||||
#include "helpers/validators.h"
|
||||
#include "mem_obj/mem_obj_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
BufferFuncs bufferFactory[IGFX_MAX_CORE] = {};
|
||||
|
||||
Buffer::Buffer(Context *context,
|
||||
MemoryPropertiesFlags memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *memoryStorage,
|
||||
void *hostPtr,
|
||||
GraphicsAllocation *gfxAllocation,
|
||||
bool zeroCopy,
|
||||
bool isHostPtrSVM,
|
||||
bool isObjectRedescribed)
|
||||
: MemObj(context,
|
||||
CL_MEM_OBJECT_BUFFER,
|
||||
memoryProperties,
|
||||
flags,
|
||||
flagsIntel,
|
||||
size,
|
||||
memoryStorage,
|
||||
hostPtr,
|
||||
gfxAllocation,
|
||||
zeroCopy,
|
||||
isHostPtrSVM,
|
||||
isObjectRedescribed) {
|
||||
magic = objectMagic;
|
||||
setHostPtrMinSize(size);
|
||||
}
|
||||
|
||||
Buffer::Buffer() : MemObj(nullptr, CL_MEM_OBJECT_BUFFER, {}, 0, 0, 0, nullptr, nullptr, nullptr, false, false, false) {
|
||||
}
|
||||
|
||||
Buffer::~Buffer() = default;
|
||||
|
||||
bool Buffer::isSubBuffer() {
|
||||
return this->associatedMemObject != nullptr;
|
||||
}
|
||||
|
||||
bool Buffer::isValidSubBufferOffset(size_t offset) {
|
||||
if (this->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
|
||||
// From spec: "origin value is aligned to the CL_DEVICE_MEM_BASE_ADDR_ALIGN value"
|
||||
if (!isAligned(offset, this->getContext()->getDevice(0)->getDeviceInfo().memBaseAddressAlign / 8u)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
cl_uint address_align = 32; // 4 byte alignment
|
||||
if ((offset & (address_align / 8 - 1)) == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void Buffer::validateInputAndCreateBuffer(cl_context &context,
|
||||
MemoryPropertiesFlags memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
cl_int &retVal,
|
||||
cl_mem &buffer) {
|
||||
Context *pContext = nullptr;
|
||||
retVal = validateObjects(WithCastToInternal(context, &pContext));
|
||||
if (retVal != CL_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel)) {
|
||||
retVal = CL_INVALID_VALUE;
|
||||
return;
|
||||
}
|
||||
|
||||
auto pDevice = pContext->getDevice(0);
|
||||
bool allowCreateBuffersWithUnrestrictedSize = isValueSet(flags, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL) ||
|
||||
isValueSet(flagsIntel, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL);
|
||||
|
||||
if (size == 0 || (size > pDevice->getHardwareCapabilities().maxMemAllocSize && !allowCreateBuffersWithUnrestrictedSize)) {
|
||||
retVal = CL_INVALID_BUFFER_SIZE;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check the host ptr and data */
|
||||
bool expectHostPtr = (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) != 0;
|
||||
if ((hostPtr == nullptr) == expectHostPtr) {
|
||||
retVal = CL_INVALID_HOST_PTR;
|
||||
return;
|
||||
}
|
||||
|
||||
// create the buffer
|
||||
buffer = create(pContext, memoryProperties, flags, flagsIntel, size, hostPtr, retVal);
|
||||
}
|
||||
|
||||
Buffer *Buffer::create(Context *context,
|
||||
cl_mem_flags flags,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet) {
|
||||
return create(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, size, hostPtr, errcodeRet);
|
||||
}
|
||||
|
||||
Buffer *Buffer::create(Context *context,
|
||||
MemoryPropertiesFlags memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet) {
|
||||
Buffer *pBuffer = nullptr;
|
||||
errcodeRet = CL_SUCCESS;
|
||||
|
||||
GraphicsAllocation *memory = nullptr;
|
||||
GraphicsAllocation *mapAllocation = nullptr;
|
||||
bool zeroCopyAllowed = true;
|
||||
bool isHostPtrSVM = false;
|
||||
|
||||
bool alignementSatisfied = true;
|
||||
bool allocateMemory = true;
|
||||
bool copyMemoryFromHostPtr = false;
|
||||
auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex();
|
||||
MemoryManager *memoryManager = context->getMemoryManager();
|
||||
UNRECOVERABLE_IF(!memoryManager);
|
||||
|
||||
GraphicsAllocation::AllocationType allocationType = getGraphicsAllocationType(
|
||||
memoryProperties,
|
||||
*context,
|
||||
HwHelper::renderCompressedBuffersSupported(context->getDevice(0)->getHardwareInfo()),
|
||||
memoryManager->isLocalMemorySupported(rootDeviceIndex),
|
||||
HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily).obtainRenderBufferCompressionPreference(context->getDevice(0)->getHardwareInfo(), size));
|
||||
|
||||
checkMemory(memoryProperties, size, hostPtr, errcodeRet, alignementSatisfied, copyMemoryFromHostPtr, memoryManager);
|
||||
|
||||
if (errcodeRet != CL_SUCCESS) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (allocationType == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
|
||||
zeroCopyAllowed = false;
|
||||
allocateMemory = true;
|
||||
}
|
||||
|
||||
if (allocationType == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) {
|
||||
if (memoryProperties.flags.useHostPtr) {
|
||||
if (alignementSatisfied) {
|
||||
allocateMemory = false;
|
||||
zeroCopyAllowed = true;
|
||||
} else {
|
||||
zeroCopyAllowed = false;
|
||||
allocateMemory = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (memoryProperties.flags.useHostPtr) {
|
||||
if (DebugManager.flags.DisableZeroCopyForUseHostPtr.get()) {
|
||||
zeroCopyAllowed = false;
|
||||
allocateMemory = true;
|
||||
}
|
||||
|
||||
auto svmManager = context->getSVMAllocsManager();
|
||||
if (svmManager) {
|
||||
auto svmData = svmManager->getSVMAlloc(hostPtr);
|
||||
if (svmData) {
|
||||
memory = svmData->gpuAllocation;
|
||||
allocationType = memory->getAllocationType();
|
||||
isHostPtrSVM = true;
|
||||
zeroCopyAllowed = memory->getAllocationType() == GraphicsAllocation::AllocationType::SVM_ZERO_COPY;
|
||||
copyMemoryFromHostPtr = false;
|
||||
allocateMemory = false;
|
||||
mapAllocation = svmData->cpuAllocation;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (context->isSharedContext) {
|
||||
zeroCopyAllowed = true;
|
||||
copyMemoryFromHostPtr = false;
|
||||
allocateMemory = false;
|
||||
}
|
||||
|
||||
if (hostPtr && context->isProvidingPerformanceHints()) {
|
||||
if (zeroCopyAllowed) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_MEETS_ALIGNMENT_RESTRICTIONS, hostPtr, size);
|
||||
} else {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, hostPtr, size, MemoryConstants::pageSize, MemoryConstants::pageSize);
|
||||
}
|
||||
}
|
||||
|
||||
if (DebugManager.flags.DisableZeroCopyForBuffers.get()) {
|
||||
zeroCopyAllowed = false;
|
||||
}
|
||||
|
||||
if (allocateMemory && context->isProvidingPerformanceHints()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_NEEDS_ALLOCATE_MEMORY);
|
||||
}
|
||||
|
||||
if (!memory) {
|
||||
AllocationProperties allocProperties = MemoryPropertiesParser::getAllocationProperties(rootDeviceIndex, memoryProperties, allocateMemory, size, allocationType, context->areMultiStorageAllocationsPreferred());
|
||||
memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties, hostPtr);
|
||||
}
|
||||
|
||||
if (allocateMemory && memory && MemoryPool::isSystemMemoryPool(memory->getMemoryPool())) {
|
||||
memoryManager->addAllocationToHostPtrManager(memory);
|
||||
}
|
||||
|
||||
//if allocation failed for CL_MEM_USE_HOST_PTR case retry with non zero copy path
|
||||
if (memoryProperties.flags.useHostPtr && !memory && Buffer::isReadOnlyMemoryPermittedByFlags(memoryProperties)) {
|
||||
allocationType = GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY;
|
||||
zeroCopyAllowed = false;
|
||||
copyMemoryFromHostPtr = true;
|
||||
AllocationProperties allocProperties = MemoryPropertiesParser::getAllocationProperties(rootDeviceIndex, memoryProperties, true, size, allocationType, context->areMultiStorageAllocationsPreferred());
|
||||
memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties);
|
||||
}
|
||||
|
||||
if (!memory) {
|
||||
errcodeRet = CL_OUT_OF_HOST_MEMORY;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (!MemoryPool::isSystemMemoryPool(memory->getMemoryPool())) {
|
||||
zeroCopyAllowed = false;
|
||||
if (hostPtr) {
|
||||
if (!isHostPtrSVM) {
|
||||
copyMemoryFromHostPtr = true;
|
||||
}
|
||||
}
|
||||
} else if (allocationType == GraphicsAllocation::AllocationType::BUFFER) {
|
||||
allocationType = GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY;
|
||||
}
|
||||
|
||||
memory->setAllocationType(allocationType);
|
||||
memory->setMemObjectsAllocationWithWritableFlags(!(memoryProperties.flags.readOnly || memoryProperties.flags.hostReadOnly || memoryProperties.flags.hostNoAccess));
|
||||
|
||||
pBuffer = createBufferHw(context,
|
||||
memoryProperties,
|
||||
flags,
|
||||
flagsIntel,
|
||||
size,
|
||||
memory->getUnderlyingBuffer(),
|
||||
(memoryProperties.flags.useHostPtr) ? hostPtr : nullptr,
|
||||
memory,
|
||||
zeroCopyAllowed,
|
||||
isHostPtrSVM,
|
||||
false);
|
||||
|
||||
if (!pBuffer) {
|
||||
errcodeRet = CL_OUT_OF_HOST_MEMORY;
|
||||
memoryManager->removeAllocationFromHostPtrManager(memory);
|
||||
memoryManager->freeGraphicsMemory(memory);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
printDebugString(DebugManager.flags.LogMemoryObject.get(), stdout,
|
||||
"\nCreated Buffer: Handle %p, hostPtr %p, size %llu, memoryStorage %p, GPU address %#llx, memoryPool:%du\n",
|
||||
pBuffer, hostPtr, size, memory->getUnderlyingBuffer(), memory->getGpuAddress(), memory->getMemoryPool());
|
||||
|
||||
if (memoryProperties.flags.useHostPtr) {
|
||||
if (!zeroCopyAllowed && !isHostPtrSVM) {
|
||||
AllocationProperties properties{rootDeviceIndex, false, size, GraphicsAllocation::AllocationType::MAP_ALLOCATION, false};
|
||||
properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = true;
|
||||
mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr);
|
||||
}
|
||||
}
|
||||
|
||||
Buffer::provideCompressionHint(allocationType, context, pBuffer);
|
||||
|
||||
pBuffer->mapAllocation = mapAllocation;
|
||||
pBuffer->setHostPtrMinSize(size);
|
||||
|
||||
if (copyMemoryFromHostPtr) {
|
||||
auto gmm = memory->getDefaultGmm();
|
||||
bool gpuCopyRequired = (gmm && gmm->isRenderCompressed) || !MemoryPool::isSystemMemoryPool(memory->getMemoryPool());
|
||||
|
||||
if (gpuCopyRequired) {
|
||||
auto blitMemoryToAllocationResult = context->blitMemoryToAllocation(*pBuffer, memory, hostPtr, size);
|
||||
|
||||
if (blitMemoryToAllocationResult != BlitOperationResult::Success) {
|
||||
auto cmdQ = context->getSpecialQueue();
|
||||
if (CL_SUCCESS != cmdQ->enqueueWriteBuffer(pBuffer, CL_TRUE, 0, size, hostPtr, nullptr, 0, nullptr, nullptr)) {
|
||||
errcodeRet = CL_OUT_OF_RESOURCES;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
memcpy_s(memory->getUnderlyingBuffer(), size, hostPtr, size);
|
||||
}
|
||||
}
|
||||
|
||||
if (errcodeRet != CL_SUCCESS) {
|
||||
pBuffer->release();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (DebugManager.flags.MakeAllBuffersResident.get()) {
|
||||
auto graphicsAllocation = pBuffer->getGraphicsAllocation();
|
||||
context->getDevice(0u)->getRootDeviceEnvironment().memoryOperationsInterface->makeResident(ArrayRef<GraphicsAllocation *>(&graphicsAllocation, 1));
|
||||
}
|
||||
|
||||
return pBuffer;
|
||||
}
|
||||
|
||||
Buffer *Buffer::createSharedBuffer(Context *context, cl_mem_flags flags, SharingHandler *sharingHandler,
|
||||
GraphicsAllocation *graphicsAllocation) {
|
||||
auto sharedBuffer = createBufferHw(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, graphicsAllocation->getUnderlyingBufferSize(), nullptr, nullptr, graphicsAllocation, false, false, false);
|
||||
|
||||
sharedBuffer->setSharingHandler(sharingHandler);
|
||||
return sharedBuffer;
|
||||
}
|
||||
|
||||
void Buffer::checkMemory(MemoryPropertiesFlags memoryProperties,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet,
|
||||
bool &alignementSatisfied,
|
||||
bool ©MemoryFromHostPtr,
|
||||
MemoryManager *memoryManager) {
|
||||
errcodeRet = CL_SUCCESS;
|
||||
alignementSatisfied = true;
|
||||
copyMemoryFromHostPtr = false;
|
||||
uintptr_t minAddress = 0;
|
||||
auto memRestrictions = memoryManager->getAlignedMallocRestrictions();
|
||||
if (memRestrictions) {
|
||||
minAddress = memRestrictions->minAddress;
|
||||
}
|
||||
|
||||
if (hostPtr) {
|
||||
if (!(memoryProperties.flags.useHostPtr || memoryProperties.flags.copyHostPtr)) {
|
||||
errcodeRet = CL_INVALID_HOST_PTR;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (memoryProperties.flags.useHostPtr) {
|
||||
if (hostPtr) {
|
||||
auto fragment = memoryManager->getHostPtrManager()->getFragment(hostPtr);
|
||||
if (fragment && fragment->driverAllocation) {
|
||||
errcodeRet = CL_INVALID_HOST_PTR;
|
||||
return;
|
||||
}
|
||||
if (alignUp(hostPtr, MemoryConstants::cacheLineSize) != hostPtr ||
|
||||
alignUp(size, MemoryConstants::cacheLineSize) != size ||
|
||||
minAddress > reinterpret_cast<uintptr_t>(hostPtr)) {
|
||||
alignementSatisfied = false;
|
||||
copyMemoryFromHostPtr = true;
|
||||
}
|
||||
} else {
|
||||
errcodeRet = CL_INVALID_HOST_PTR;
|
||||
}
|
||||
}
|
||||
|
||||
if (memoryProperties.flags.copyHostPtr) {
|
||||
if (hostPtr) {
|
||||
copyMemoryFromHostPtr = true;
|
||||
} else {
|
||||
errcodeRet = CL_INVALID_HOST_PTR;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
GraphicsAllocation::AllocationType Buffer::getGraphicsAllocationType(const MemoryPropertiesFlags &properties, Context &context,
|
||||
bool renderCompressedBuffers, bool isLocalMemoryEnabled,
|
||||
bool preferCompression) {
|
||||
if (context.isSharedContext || properties.flags.forceSharedPhysicalMemory) {
|
||||
return GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY;
|
||||
}
|
||||
|
||||
if (properties.flags.useHostPtr && !isLocalMemoryEnabled) {
|
||||
return GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY;
|
||||
}
|
||||
|
||||
if (MemObjHelper::isSuitableForRenderCompression(renderCompressedBuffers, properties, context, preferCompression)) {
|
||||
return GraphicsAllocation::AllocationType::BUFFER_COMPRESSED;
|
||||
}
|
||||
|
||||
return GraphicsAllocation::AllocationType::BUFFER;
|
||||
}
|
||||
|
||||
bool Buffer::isReadOnlyMemoryPermittedByFlags(const MemoryPropertiesFlags &properties) {
|
||||
// Host won't access or will only read and kernel will only read
|
||||
return (properties.flags.hostNoAccess || properties.flags.hostReadOnly) && properties.flags.readOnly;
|
||||
}
|
||||
|
||||
Buffer *Buffer::createSubBuffer(cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
const cl_buffer_region *region,
|
||||
cl_int &errcodeRet) {
|
||||
DEBUG_BREAK_IF(nullptr == createFunction);
|
||||
MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0);
|
||||
auto buffer = createFunction(this->context, memoryProperties, flags, 0, region->size,
|
||||
ptrOffset(this->memoryStorage, region->origin),
|
||||
this->hostPtr ? ptrOffset(this->hostPtr, region->origin) : nullptr,
|
||||
this->graphicsAllocation,
|
||||
this->isZeroCopy, this->isHostPtrSVM, false);
|
||||
|
||||
if (this->context->isProvidingPerformanceHints()) {
|
||||
this->context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, SUBBUFFER_SHARES_MEMORY, static_cast<cl_mem>(this));
|
||||
}
|
||||
|
||||
buffer->associatedMemObject = this;
|
||||
buffer->offset = region->origin;
|
||||
buffer->setParentSharingHandler(this->getSharingHandler());
|
||||
this->incRefInternal();
|
||||
|
||||
errcodeRet = CL_SUCCESS;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
uint64_t Buffer::setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing) {
|
||||
// Subbuffers have offset that graphicsAllocation is not aware of
|
||||
uintptr_t addressToPatch = ((set32BitAddressing) ? static_cast<uintptr_t>(graphicsAllocation->getGpuAddressToPatch()) : static_cast<uintptr_t>(graphicsAllocation->getGpuAddress())) + this->offset;
|
||||
DEBUG_BREAK_IF(!(graphicsAllocation->isLocked() || (addressToPatch != 0) || (graphicsAllocation->getGpuBaseAddress() != 0) ||
|
||||
(this->getCpuAddress() == nullptr && this->getGraphicsAllocation()->peekSharedHandle())));
|
||||
|
||||
patchWithRequiredSize(memory, patchSize, addressToPatch);
|
||||
|
||||
return addressToPatch;
|
||||
}
|
||||
|
||||
bool Buffer::bufferRectPitchSet(const size_t *bufferOrigin,
|
||||
const size_t *region,
|
||||
size_t &bufferRowPitch,
|
||||
size_t &bufferSlicePitch,
|
||||
size_t &hostRowPitch,
|
||||
size_t &hostSlicePitch) {
|
||||
if (bufferRowPitch == 0)
|
||||
bufferRowPitch = region[0];
|
||||
if (bufferSlicePitch == 0)
|
||||
bufferSlicePitch = region[1] * bufferRowPitch;
|
||||
|
||||
if (hostRowPitch == 0)
|
||||
hostRowPitch = region[0];
|
||||
if (hostSlicePitch == 0)
|
||||
hostSlicePitch = region[1] * hostRowPitch;
|
||||
|
||||
if (bufferRowPitch < region[0] ||
|
||||
hostRowPitch < region[0]) {
|
||||
return false;
|
||||
}
|
||||
if ((bufferSlicePitch < region[1] * bufferRowPitch || bufferSlicePitch % bufferRowPitch != 0) ||
|
||||
(hostSlicePitch < region[1] * hostRowPitch || hostSlicePitch % hostRowPitch != 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((bufferOrigin[2] + region[2] - 1) * bufferSlicePitch + (bufferOrigin[1] + region[1] - 1) * bufferRowPitch + bufferOrigin[0] + region[0] > this->getSize()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void Buffer::transferData(void *dst, void *src, size_t copySize, size_t copyOffset) {
|
||||
DBG_LOG(LogMemoryObject, __FUNCTION__, " hostPtr: ", hostPtr, ", size: ", copySize, ", offset: ", copyOffset, ", memoryStorage: ", memoryStorage);
|
||||
auto dstPtr = ptrOffset(dst, copyOffset);
|
||||
auto srcPtr = ptrOffset(src, copyOffset);
|
||||
memcpy_s(dstPtr, copySize, srcPtr, copySize);
|
||||
}
|
||||
|
||||
void Buffer::transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) {
|
||||
transferData(hostPtr, memoryStorage, copySize[0], copyOffset[0]);
|
||||
}
|
||||
|
||||
void Buffer::transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) {
|
||||
transferData(memoryStorage, hostPtr, copySize[0], copyOffset[0]);
|
||||
}
|
||||
|
||||
size_t Buffer::calculateHostPtrSize(const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch) {
|
||||
size_t hostPtrOffsetInBytes = origin[2] * slicePitch + origin[1] * rowPitch + origin[0];
|
||||
size_t hostPtrRegionSizeInbytes = region[0] + rowPitch * (region[1] - 1) + slicePitch * (region[2] - 1);
|
||||
size_t hostPtrSize = hostPtrOffsetInBytes + hostPtrRegionSizeInbytes;
|
||||
return hostPtrSize;
|
||||
}
|
||||
|
||||
bool Buffer::isReadWriteOnCpuAllowed() {
|
||||
if (forceDisallowCPUCopy) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (this->isCompressed()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (graphicsAllocation->peekSharedHandle() != 0) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Buffer::isReadWriteOnCpuPreffered(void *ptr, size_t size) {
|
||||
//if buffer is not zero copy and pointer is aligned it will be more beneficial to do the transfer on GPU
|
||||
if (!isMemObjZeroCopy() && (reinterpret_cast<uintptr_t>(ptr) & (MemoryConstants::cacheLineSize - 1)) == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
//on low power devices larger transfers are better on the GPU
|
||||
if (context->getDevice(0)->getDeviceInfo().platformLP && size > maxBufferSizeForReadWriteOnCpu) {
|
||||
return false;
|
||||
}
|
||||
|
||||
//if we are not in System Memory Pool, it is more beneficial to do the transfer on GPU
|
||||
//for 32 bit applications, utilize CPU transfers here.
|
||||
if (!MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool()) && is64bit) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Buffer *Buffer::createBufferHw(Context *context,
|
||||
MemoryPropertiesFlags memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *memoryStorage,
|
||||
void *hostPtr,
|
||||
GraphicsAllocation *gfxAllocation,
|
||||
bool zeroCopy,
|
||||
bool isHostPtrSVM,
|
||||
bool isImageRedescribed) {
|
||||
const auto device = context->getDevice(0);
|
||||
const auto &hwInfo = device->getHardwareInfo();
|
||||
|
||||
auto funcCreate = bufferFactory[hwInfo.platform.eRenderCoreFamily].createBufferFunction;
|
||||
DEBUG_BREAK_IF(nullptr == funcCreate);
|
||||
auto pBuffer = funcCreate(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, gfxAllocation,
|
||||
zeroCopy, isHostPtrSVM, isImageRedescribed);
|
||||
DEBUG_BREAK_IF(nullptr == pBuffer);
|
||||
if (pBuffer) {
|
||||
pBuffer->createFunction = funcCreate;
|
||||
}
|
||||
return pBuffer;
|
||||
}
|
||||
|
||||
Buffer *Buffer::createBufferHwFromDevice(const ClDevice *device,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *memoryStorage,
|
||||
void *hostPtr,
|
||||
GraphicsAllocation *gfxAllocation,
|
||||
size_t offset,
|
||||
bool zeroCopy,
|
||||
bool isHostPtrSVM,
|
||||
bool isImageRedescribed) {
|
||||
|
||||
const auto &hwInfo = device->getHardwareInfo();
|
||||
|
||||
auto funcCreate = bufferFactory[hwInfo.platform.eRenderCoreFamily].createBufferFunction;
|
||||
DEBUG_BREAK_IF(nullptr == funcCreate);
|
||||
MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0);
|
||||
auto pBuffer = funcCreate(nullptr, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, gfxAllocation,
|
||||
zeroCopy, isHostPtrSVM, isImageRedescribed);
|
||||
pBuffer->offset = offset;
|
||||
pBuffer->executionEnvironment = device->getExecutionEnvironment();
|
||||
return pBuffer;
|
||||
}
|
||||
|
||||
uint32_t Buffer::getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) const {
|
||||
uint64_t bufferAddress = 0;
|
||||
size_t bufferSize = 0;
|
||||
if (getGraphicsAllocation()) {
|
||||
bufferAddress = getGraphicsAllocation()->getGpuAddress();
|
||||
bufferSize = getGraphicsAllocation()->getUnderlyingBufferSize();
|
||||
} else {
|
||||
bufferAddress = reinterpret_cast<uint64_t>(getHostPtr());
|
||||
bufferSize = getSize();
|
||||
}
|
||||
bufferAddress += this->offset;
|
||||
|
||||
bool readOnlyMemObj = isValueSet(getMemoryPropertiesFlags(), CL_MEM_READ_ONLY) || isReadOnlyArgument;
|
||||
bool alignedMemObj = isAligned<MemoryConstants::cacheLineSize>(bufferAddress) &&
|
||||
isAligned<MemoryConstants::cacheLineSize>(bufferSize);
|
||||
|
||||
auto gmmHelper = executionEnvironment->getGmmHelper();
|
||||
if (!disableL3Cache && !isMemObjUncacheableForSurfaceState() && (alignedMemObj || readOnlyMemObj || !isMemObjZeroCopy())) {
|
||||
return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
} else {
|
||||
return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
|
||||
}
|
||||
}
|
||||
|
||||
bool Buffer::isCompressed() const {
|
||||
if (this->getGraphicsAllocation()->getDefaultGmm()) {
|
||||
return this->getGraphicsAllocation()->getDefaultGmm()->isRenderCompressed;
|
||||
}
|
||||
if (this->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void Buffer::setSurfaceState(const ClDevice *device,
|
||||
void *surfaceState,
|
||||
size_t svmSize,
|
||||
void *svmPtr,
|
||||
size_t offset,
|
||||
GraphicsAllocation *gfxAlloc,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel) {
|
||||
auto buffer = Buffer::createBufferHwFromDevice(device, flags, flagsIntel, svmSize, svmPtr, svmPtr, gfxAlloc, offset, true, false, false);
|
||||
buffer->setArgStateful(surfaceState, false, false, false, false);
|
||||
buffer->graphicsAllocation = nullptr;
|
||||
delete buffer;
|
||||
}
|
||||
|
||||
void Buffer::provideCompressionHint(GraphicsAllocation::AllocationType allocationType,
|
||||
Context *context,
|
||||
Buffer *buffer) {
|
||||
if (context->isProvidingPerformanceHints() && HwHelper::renderCompressedBuffersSupported(context->getDevice(0)->getHardwareInfo())) {
|
||||
if (allocationType == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, BUFFER_IS_COMPRESSED, buffer);
|
||||
} else {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, BUFFER_IS_NOT_COMPRESSED, buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace NEO
|
||||
225
opencl/source/mem_obj/buffer.h
Normal file
225
opencl/source/mem_obj/buffer.h
Normal file
@@ -0,0 +1,225 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "core/helpers/basic_math.h"
|
||||
#include "core/memory_manager/memory_constants.h"
|
||||
|
||||
#include "context/context_type.h"
|
||||
#include "extensions/public/cl_ext_private.h"
|
||||
#include "igfxfmid.h"
|
||||
#include "mem_obj/mem_obj.h"
|
||||
#include "memory_properties_flags.h"
|
||||
|
||||
namespace NEO {
|
||||
class Buffer;
|
||||
class ClDevice;
|
||||
class MemoryManager;
|
||||
|
||||
typedef Buffer *(*BufferCreatFunc)(Context *context,
|
||||
MemoryPropertiesFlags memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *memoryStorage,
|
||||
void *hostPtr,
|
||||
GraphicsAllocation *gfxAllocation,
|
||||
bool zeroCopy,
|
||||
bool isHostPtrSVM,
|
||||
bool isImageRedescribed);
|
||||
|
||||
typedef struct {
|
||||
BufferCreatFunc createBufferFunction;
|
||||
} BufferFuncs;
|
||||
|
||||
extern BufferFuncs bufferFactory[IGFX_MAX_CORE];
|
||||
|
||||
class Buffer : public MemObj {
|
||||
public:
|
||||
constexpr static size_t maxBufferSizeForReadWriteOnCpu = 10 * MB;
|
||||
constexpr static cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL;
|
||||
constexpr static cl_ulong objectMagic = MemObj::objectMagic | 0x02;
|
||||
bool forceDisallowCPUCopy = false;
|
||||
|
||||
~Buffer() override;
|
||||
|
||||
static void validateInputAndCreateBuffer(cl_context &context,
|
||||
MemoryPropertiesFlags memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
cl_int &retVal,
|
||||
cl_mem &buffer);
|
||||
|
||||
static Buffer *create(Context *context,
|
||||
cl_mem_flags flags,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
static Buffer *create(Context *context,
|
||||
MemoryPropertiesFlags properties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
static Buffer *createSharedBuffer(Context *context,
|
||||
cl_mem_flags flags,
|
||||
SharingHandler *sharingHandler,
|
||||
GraphicsAllocation *graphicsAllocation);
|
||||
|
||||
static Buffer *createBufferHw(Context *context,
|
||||
MemoryPropertiesFlags memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *memoryStorage,
|
||||
void *hostPtr,
|
||||
GraphicsAllocation *gfxAllocation,
|
||||
bool zeroCopy,
|
||||
bool isHostPtrSVM,
|
||||
bool isImageRedescribed);
|
||||
|
||||
static Buffer *createBufferHwFromDevice(const ClDevice *device,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *memoryStorage,
|
||||
void *hostPtr,
|
||||
GraphicsAllocation *gfxAllocation,
|
||||
size_t offset,
|
||||
bool zeroCopy,
|
||||
bool isHostPtrSVM,
|
||||
bool isImageRedescribed);
|
||||
|
||||
Buffer *createSubBuffer(cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
const cl_buffer_region *region,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
static void setSurfaceState(const ClDevice *device,
|
||||
void *surfaceState,
|
||||
size_t svmSize,
|
||||
void *svmPtr,
|
||||
size_t offset,
|
||||
GraphicsAllocation *gfxAlloc,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel);
|
||||
|
||||
static void provideCompressionHint(GraphicsAllocation::AllocationType allocationType,
|
||||
Context *context,
|
||||
Buffer *buffer);
|
||||
|
||||
BufferCreatFunc createFunction = nullptr;
|
||||
bool isSubBuffer();
|
||||
bool isValidSubBufferOffset(size_t offset);
|
||||
uint64_t setArgStateless(void *memory, uint32_t patchSize) { return setArgStateless(memory, patchSize, false); }
|
||||
uint64_t setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing);
|
||||
virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly) = 0;
|
||||
bool bufferRectPitchSet(const size_t *bufferOrigin,
|
||||
const size_t *region,
|
||||
size_t &bufferRowPitch,
|
||||
size_t &bufferSlicePitch,
|
||||
size_t &hostRowPitch,
|
||||
size_t &hostSlicePitch);
|
||||
|
||||
static size_t calculateHostPtrSize(const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch);
|
||||
|
||||
void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override;
|
||||
void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override;
|
||||
|
||||
bool isReadWriteOnCpuAllowed();
|
||||
bool isReadWriteOnCpuPreffered(void *ptr, size_t size);
|
||||
|
||||
uint32_t getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) const;
|
||||
|
||||
bool isCompressed() const;
|
||||
|
||||
protected:
|
||||
Buffer(Context *context,
|
||||
MemoryPropertiesFlags memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *memoryStorage,
|
||||
void *hostPtr,
|
||||
GraphicsAllocation *gfxAllocation,
|
||||
bool zeroCopy,
|
||||
bool isHostPtrSVM,
|
||||
bool isObjectRedescribed);
|
||||
|
||||
Buffer();
|
||||
|
||||
static void checkMemory(MemoryPropertiesFlags memoryProperties,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
cl_int &errcodeRet,
|
||||
bool &isZeroCopy,
|
||||
bool ©MemoryFromHostPtr,
|
||||
MemoryManager *memMngr);
|
||||
static GraphicsAllocation::AllocationType getGraphicsAllocationType(const MemoryPropertiesFlags &properties, Context &context,
|
||||
bool renderCompressedBuffers, bool localMemoryEnabled,
|
||||
bool preferCompression);
|
||||
static bool isReadOnlyMemoryPermittedByFlags(const MemoryPropertiesFlags &properties);
|
||||
|
||||
void transferData(void *dst, void *src, size_t copySize, size_t copyOffset);
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
class BufferHw : public Buffer {
|
||||
public:
|
||||
BufferHw(Context *context,
|
||||
MemoryPropertiesFlags memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *memoryStorage,
|
||||
void *hostPtr,
|
||||
GraphicsAllocation *gfxAllocation,
|
||||
bool zeroCopy,
|
||||
bool isHostPtrSVM,
|
||||
bool isObjectRedescribed)
|
||||
: Buffer(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, gfxAllocation,
|
||||
zeroCopy, isHostPtrSVM, isObjectRedescribed) {}
|
||||
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument) override;
|
||||
void appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation, bool isReadOnlyArgument);
|
||||
void appendSurfaceStateExt(void *memory);
|
||||
|
||||
static Buffer *create(Context *context,
|
||||
MemoryPropertiesFlags memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *memoryStorage,
|
||||
void *hostPtr,
|
||||
GraphicsAllocation *gfxAllocation,
|
||||
bool zeroCopy,
|
||||
bool isHostPtrSVM,
|
||||
bool isObjectRedescribed) {
|
||||
auto buffer = new BufferHw<GfxFamily>(context,
|
||||
memoryProperties,
|
||||
flags,
|
||||
flagsIntel,
|
||||
size,
|
||||
memoryStorage,
|
||||
hostPtr,
|
||||
gfxAllocation,
|
||||
zeroCopy,
|
||||
isHostPtrSVM,
|
||||
isObjectRedescribed);
|
||||
buffer->surfaceType = SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
typedef typename GfxFamily::RENDER_SURFACE_STATE SURFACE_STATE;
|
||||
typename SURFACE_STATE::SURFACE_TYPE surfaceType;
|
||||
};
|
||||
} // namespace NEO
|
||||
85
opencl/source/mem_obj/buffer_base.inl
Normal file
85
opencl/source/mem_obj/buffer_base.inl
Normal file
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "core/execution_environment/execution_environment.h"
|
||||
#include "core/gmm_helper/gmm.h"
|
||||
#include "core/gmm_helper/resource_info.h"
|
||||
#include "core/helpers/aligned_memory.h"
|
||||
#include "core/helpers/bit_helpers.h"
|
||||
#include "core/helpers/hw_cmds.h"
|
||||
|
||||
#include "buffer_ext.inl"
|
||||
#include "helpers/surface_formats.h"
|
||||
#include "mem_obj/buffer.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
union SURFACE_STATE_BUFFER_LENGTH {
|
||||
uint32_t Length;
|
||||
struct SurfaceState {
|
||||
uint32_t Width : BITFIELD_RANGE(0, 6);
|
||||
uint32_t Height : BITFIELD_RANGE(7, 20);
|
||||
uint32_t Depth : BITFIELD_RANGE(21, 31);
|
||||
} SurfaceState;
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument) {
|
||||
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
||||
using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
|
||||
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
|
||||
|
||||
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(memory);
|
||||
// The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address
|
||||
auto bufferAddress = (getGraphicsAllocation() != nullptr) ? getGraphicsAllocation()->getGpuAddress() : castToUint64(getHostPtr());
|
||||
bufferAddress += this->offset;
|
||||
|
||||
auto bufferAddressAligned = alignDown(bufferAddress, 4);
|
||||
auto bufferOffset = ptrDiff(bufferAddress, bufferAddressAligned);
|
||||
|
||||
auto surfaceSize = alignUp(getSize() + bufferOffset, alignSizeForAuxTranslation ? 512 : 4);
|
||||
|
||||
SURFACE_STATE_BUFFER_LENGTH Length = {0};
|
||||
Length.Length = static_cast<uint32_t>(surfaceSize - 1);
|
||||
|
||||
surfaceState->setWidth(Length.SurfaceState.Width + 1);
|
||||
surfaceState->setHeight(Length.SurfaceState.Height + 1);
|
||||
surfaceState->setDepth(Length.SurfaceState.Depth + 1);
|
||||
|
||||
if (bufferAddress != 0) {
|
||||
surfaceState->setSurfaceType(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER);
|
||||
} else {
|
||||
surfaceState->setSurfaceType(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL);
|
||||
}
|
||||
surfaceState->setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW);
|
||||
surfaceState->setSurfaceVerticalAlignment(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4);
|
||||
surfaceState->setSurfaceHorizontalAlignment(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4);
|
||||
|
||||
surfaceState->setTileMode(RENDER_SURFACE_STATE::TILE_MODE_LINEAR);
|
||||
surfaceState->setVerticalLineStride(0);
|
||||
surfaceState->setVerticalLineStrideOffset(0);
|
||||
|
||||
surfaceState->setMemoryObjectControlState(getMocsValue(disableL3, isReadOnlyArgument));
|
||||
surfaceState->setSurfaceBaseAddress(bufferAddressAligned);
|
||||
|
||||
Gmm *gmm = graphicsAllocation ? graphicsAllocation->getDefaultGmm() : nullptr;
|
||||
|
||||
if (gmm && gmm->isRenderCompressed && !forceNonAuxMode &&
|
||||
GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == graphicsAllocation->getAllocationType()) {
|
||||
// Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios
|
||||
surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
|
||||
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E);
|
||||
} else {
|
||||
surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT);
|
||||
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE);
|
||||
}
|
||||
|
||||
appendBufferState(memory, context, getGraphicsAllocation(), isReadOnlyArgument);
|
||||
appendSurfaceStateExt(memory);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
16
opencl/source/mem_obj/buffer_bdw_plus.inl
Normal file
16
opencl/source/mem_obj/buffer_bdw_plus.inl
Normal file
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "mem_obj/buffer_base.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void BufferHw<GfxFamily>::appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation, bool isReadOnly) {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
12
opencl/source/mem_obj/buffer_factory_init.inl
Normal file
12
opencl/source/mem_obj/buffer_factory_init.inl
Normal file
@@ -0,0 +1,12 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
template <>
|
||||
void populateFactoryTable<BufferHw<Family>>() {
|
||||
extern BufferFuncs bufferFactory[IGFX_MAX_CORE];
|
||||
bufferFactory[gfxCore].createBufferFunction = BufferHw<Family>::create;
|
||||
}
|
||||
16
opencl/source/mem_obj/definitions/buffer_ext.inl
Normal file
16
opencl/source/mem_obj/definitions/buffer_ext.inl
Normal file
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "mem_obj/buffer.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void BufferHw<GfxFamily>::appendSurfaceStateExt(void *memory) {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
16
opencl/source/mem_obj/definitions/image_ext.inl
Normal file
16
opencl/source/mem_obj/definitions/image_ext.inl
Normal file
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "mem_obj/image.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void ImageHw<GfxFamily>::appendSurfaceStateExt(void *memory) {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
1402
opencl/source/mem_obj/image.cpp
Normal file
1402
opencl/source/mem_obj/image.cpp
Normal file
File diff suppressed because it is too large
Load Diff
355
opencl/source/mem_obj/image.h
Normal file
355
opencl/source/mem_obj/image.h
Normal file
@@ -0,0 +1,355 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "core/helpers/string.h"
|
||||
#include "core/image/image_surface_state.h"
|
||||
|
||||
#include "helpers/surface_formats.h"
|
||||
#include "helpers/validators.h"
|
||||
#include "mem_obj/buffer.h"
|
||||
#include "mem_obj/mem_obj.h"
|
||||
|
||||
namespace NEO {
|
||||
class Image;
|
||||
struct KernelInfo;
|
||||
struct SurfaceFormatInfo;
|
||||
|
||||
typedef Image *(*ImageCreatFunc)(Context *context,
|
||||
const MemoryPropertiesFlags &memoryProperties,
|
||||
uint64_t flags,
|
||||
uint64_t flagsIntel,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
const cl_image_format &imageFormat,
|
||||
const cl_image_desc &imageDesc,
|
||||
bool zeroCopy,
|
||||
GraphicsAllocation *graphicsAllocation,
|
||||
bool isImageRedescribed,
|
||||
uint32_t baseMipLevel,
|
||||
uint32_t mipCount,
|
||||
const ClSurfaceFormatInfo *surfaceFormatInfo,
|
||||
const SurfaceOffsets *surfaceOffsets);
|
||||
|
||||
typedef struct {
|
||||
ImageCreatFunc createImageFunction;
|
||||
} ImageFuncs;
|
||||
|
||||
class Image : public MemObj {
|
||||
public:
|
||||
const static cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL;
|
||||
static const cl_ulong objectMagic = MemObj::objectMagic | 0x01;
|
||||
|
||||
~Image() override;
|
||||
|
||||
static Image *create(Context *context,
|
||||
const MemoryPropertiesFlags &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
const ClSurfaceFormatInfo *surfaceFormat,
|
||||
const cl_image_desc *imageDesc,
|
||||
const void *hostPtr,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
static Image *validateAndCreateImage(Context *context,
|
||||
const MemoryPropertiesFlags &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
const cl_image_format *imageFormat,
|
||||
const cl_image_desc *imageDesc,
|
||||
const void *hostPtr,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
static Image *createImageHw(Context *context, const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr,
|
||||
const cl_image_format &imageFormat, const cl_image_desc &imageDesc,
|
||||
bool zeroCopy, GraphicsAllocation *graphicsAllocation,
|
||||
bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo *surfaceFormatInfo = nullptr);
|
||||
|
||||
static Image *createSharedImage(Context *context, SharingHandler *sharingHandler, const McsSurfaceInfo &mcsSurfaceInfo,
|
||||
GraphicsAllocation *graphicsAllocation, GraphicsAllocation *mcsAllocation,
|
||||
cl_mem_flags flags, const ClSurfaceFormatInfo *surfaceFormat, ImageInfo &imgInfo, uint32_t cubeFaceIndex, uint32_t baseMipLevel, uint32_t mipCount);
|
||||
|
||||
static cl_int validate(Context *context,
|
||||
const MemoryPropertiesFlags &memoryProperties,
|
||||
const ClSurfaceFormatInfo *surfaceFormat,
|
||||
const cl_image_desc *imageDesc,
|
||||
const void *hostPtr);
|
||||
static cl_int validateImageFormat(const cl_image_format *imageFormat);
|
||||
|
||||
static int32_t validatePlanarYUV(Context *context,
|
||||
const MemoryPropertiesFlags &memoryProperties,
|
||||
const cl_image_desc *imageDesc,
|
||||
const void *hostPtr);
|
||||
|
||||
static int32_t validatePackedYUV(const MemoryPropertiesFlags &memoryProperties, const cl_image_desc *imageDesc);
|
||||
|
||||
static cl_int validateImageTraits(Context *context, const MemoryPropertiesFlags &memoryProperties, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, const void *hostPtr);
|
||||
|
||||
static size_t calculateHostPtrSize(const size_t *region, size_t rowPitch, size_t slicePitch, size_t pixelSize, uint32_t imageType);
|
||||
|
||||
static void calculateHostPtrOffset(size_t *imageOffset, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, uint32_t imageType, size_t bytesPerPixel);
|
||||
|
||||
static cl_int getImageParams(Context *context,
|
||||
cl_mem_flags flags,
|
||||
const ClSurfaceFormatInfo *surfaceFormat,
|
||||
const cl_image_desc *imageDesc,
|
||||
size_t *imageRowPitch,
|
||||
size_t *imageSlicePitch);
|
||||
|
||||
static bool isImage1d(const cl_image_desc &imageDesc);
|
||||
|
||||
static bool isImage2d(cl_mem_object_type imageType);
|
||||
|
||||
static bool isImage2dOr2dArray(cl_mem_object_type imageType);
|
||||
|
||||
static bool isDepthFormat(const cl_image_format &imageFormat);
|
||||
|
||||
static bool hasSlices(cl_mem_object_type type) {
|
||||
return (type == CL_MEM_OBJECT_IMAGE3D) || (type == CL_MEM_OBJECT_IMAGE1D_ARRAY) || (type == CL_MEM_OBJECT_IMAGE2D_ARRAY);
|
||||
}
|
||||
|
||||
static ImageType convertType(const cl_mem_object_type type);
|
||||
static cl_mem_object_type convertType(const ImageType type);
|
||||
static ImageDescriptor convertDescriptor(const cl_image_desc &imageDesc);
|
||||
static cl_image_desc convertDescriptor(const ImageDescriptor &imageDesc);
|
||||
|
||||
cl_int getImageInfo(cl_image_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
virtual void setImageArg(void *memory, bool isMediaBlockImage, uint32_t mipLevel) = 0;
|
||||
virtual void setMediaImageArg(void *memory) = 0;
|
||||
virtual void setMediaSurfaceRotation(void *memory) = 0;
|
||||
virtual void setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) = 0;
|
||||
|
||||
const cl_image_desc &getImageDesc() const;
|
||||
const cl_image_format &getImageFormat() const;
|
||||
const ClSurfaceFormatInfo &getSurfaceFormatInfo() const;
|
||||
|
||||
void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override;
|
||||
void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override;
|
||||
|
||||
static bool isFormatRedescribable(cl_image_format format);
|
||||
Image *redescribe();
|
||||
Image *redescribeFillImage();
|
||||
ImageCreatFunc createFunction;
|
||||
|
||||
uint32_t getQPitch() { return qPitch; }
|
||||
void setQPitch(uint32_t qPitch) { this->qPitch = qPitch; }
|
||||
size_t getHostPtrRowPitch() const { return hostPtrRowPitch; }
|
||||
void setHostPtrRowPitch(size_t pitch) { this->hostPtrRowPitch = pitch; }
|
||||
size_t getHostPtrSlicePitch() const { return hostPtrSlicePitch; }
|
||||
void setHostPtrSlicePitch(size_t pitch) { this->hostPtrSlicePitch = pitch; }
|
||||
size_t getImageCount() const { return imageCount; }
|
||||
void setImageCount(size_t imageCount) { this->imageCount = imageCount; }
|
||||
void setImageRowPitch(size_t rowPitch) { imageDesc.image_row_pitch = rowPitch; }
|
||||
void setImageSlicePitch(size_t slicePitch) { imageDesc.image_slice_pitch = slicePitch; }
|
||||
void setSurfaceOffsets(uint64_t offset, uint32_t xOffset, uint32_t yOffset, uint32_t yOffsetForUVPlane) {
|
||||
surfaceOffsets.offset = offset;
|
||||
surfaceOffsets.xOffset = xOffset;
|
||||
surfaceOffsets.yOffset = yOffset;
|
||||
surfaceOffsets.yOffsetForUVplane = yOffsetForUVPlane;
|
||||
}
|
||||
void getSurfaceOffsets(SurfaceOffsets &surfaceOffsetsOut) { surfaceOffsetsOut = this->surfaceOffsets; }
|
||||
|
||||
void setCubeFaceIndex(uint32_t index) { cubeFaceIndex = index; }
|
||||
uint32_t getCubeFaceIndex() { return cubeFaceIndex; }
|
||||
void setMediaPlaneType(cl_uint type) { mediaPlaneType = type; }
|
||||
cl_uint getMediaPlaneType() const { return mediaPlaneType; }
|
||||
int peekBaseMipLevel() { return baseMipLevel; }
|
||||
void setBaseMipLevel(int level) { this->baseMipLevel = level; }
|
||||
|
||||
uint32_t peekMipCount() { return mipCount; }
|
||||
void setMipCount(uint32_t mipCountNew) { this->mipCount = mipCountNew; }
|
||||
|
||||
static const ClSurfaceFormatInfo *getSurfaceFormatFromTable(cl_mem_flags flags, const cl_image_format *imageFormat, unsigned int clVersionSupport);
|
||||
static cl_int validateRegionAndOrigin(const size_t *origin, const size_t *region, const cl_image_desc &imgDesc);
|
||||
|
||||
cl_int writeNV12Planes(const void *hostPtr, size_t hostPtrRowPitch);
|
||||
void setMcsSurfaceInfo(const McsSurfaceInfo &info) { mcsSurfaceInfo = info; }
|
||||
const McsSurfaceInfo &getMcsSurfaceInfo() { return mcsSurfaceInfo; }
|
||||
size_t calculateOffsetForMapping(const MemObjOffsetArray &origin) const override;
|
||||
|
||||
virtual void transformImage2dArrayTo3d(void *memory) = 0;
|
||||
virtual void transformImage3dTo2dArray(void *memory) = 0;
|
||||
|
||||
bool hasSameDescriptor(const cl_image_desc &imageDesc) const;
|
||||
bool hasValidParentImageFormat(const cl_image_format &imageFormat) const;
|
||||
|
||||
bool isImageFromBuffer() const { return castToObject<Buffer>(static_cast<cl_mem>(associatedMemObject)) ? true : false; }
|
||||
bool isImageFromImage() const { return castToObject<Image>(static_cast<cl_mem>(associatedMemObject)) ? true : false; }
|
||||
|
||||
protected:
|
||||
Image(Context *context,
|
||||
const MemoryPropertiesFlags &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
cl_image_format imageFormat,
|
||||
const cl_image_desc &imageDesc,
|
||||
bool zeroCopy,
|
||||
GraphicsAllocation *graphicsAllocation,
|
||||
bool isObjectRedescribed,
|
||||
uint32_t baseMipLevel,
|
||||
uint32_t mipCount,
|
||||
const ClSurfaceFormatInfo &surfaceFormatInfo,
|
||||
const SurfaceOffsets *surfaceOffsets = nullptr);
|
||||
|
||||
void getOsSpecificImageInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam);
|
||||
|
||||
void transferData(void *dst, size_t dstRowPitch, size_t dstSlicePitch,
|
||||
void *src, size_t srcRowPitch, size_t srcSlicePitch,
|
||||
std::array<size_t, 3> copyRegion, std::array<size_t, 3> copyOrigin);
|
||||
|
||||
cl_image_format imageFormat;
|
||||
cl_image_desc imageDesc;
|
||||
ClSurfaceFormatInfo surfaceFormatInfo;
|
||||
McsSurfaceInfo mcsSurfaceInfo = {};
|
||||
uint32_t qPitch = 0;
|
||||
size_t hostPtrRowPitch = 0;
|
||||
size_t hostPtrSlicePitch = 0;
|
||||
size_t imageCount = 0;
|
||||
uint32_t cubeFaceIndex;
|
||||
cl_uint mediaPlaneType;
|
||||
SurfaceOffsets surfaceOffsets = {0};
|
||||
uint32_t baseMipLevel = 0;
|
||||
uint32_t mipCount = 1;
|
||||
|
||||
static bool isValidSingleChannelFormat(const cl_image_format *imageFormat);
|
||||
static bool isValidIntensityFormat(const cl_image_format *imageFormat);
|
||||
static bool isValidLuminanceFormat(const cl_image_format *imageFormat);
|
||||
static bool isValidDepthFormat(const cl_image_format *imageFormat);
|
||||
static bool isValidDoubleChannelFormat(const cl_image_format *imageFormat);
|
||||
static bool isValidTripleChannelFormat(const cl_image_format *imageFormat);
|
||||
static bool isValidRGBAFormat(const cl_image_format *imageFormat);
|
||||
static bool isValidSRGBFormat(const cl_image_format *imageFormat);
|
||||
static bool isValidARGBFormat(const cl_image_format *imageFormat);
|
||||
static bool isValidDepthStencilFormat(const cl_image_format *imageFormat);
|
||||
static bool isValidYUVFormat(const cl_image_format *imageFormat);
|
||||
static bool hasAlphaChannel(const cl_image_format *imageFormat);
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
class ImageHw : public Image {
|
||||
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
||||
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
|
||||
|
||||
public:
|
||||
ImageHw(Context *context,
|
||||
const MemoryPropertiesFlags &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
const cl_image_format &imageFormat,
|
||||
const cl_image_desc &imageDesc,
|
||||
bool zeroCopy,
|
||||
GraphicsAllocation *graphicsAllocation,
|
||||
bool isObjectRedescribed,
|
||||
uint32_t baseMipLevel,
|
||||
uint32_t mipCount,
|
||||
const ClSurfaceFormatInfo &surfaceFormatInfo,
|
||||
const SurfaceOffsets *surfaceOffsets = nullptr)
|
||||
: Image(context, memoryProperties, flags, flagsIntel, size, hostPtr, imageFormat, imageDesc,
|
||||
zeroCopy, graphicsAllocation, isObjectRedescribed, baseMipLevel, mipCount, surfaceFormatInfo, surfaceOffsets) {
|
||||
if (getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D ||
|
||||
getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER ||
|
||||
getImageDesc().image_type == CL_MEM_OBJECT_IMAGE2D ||
|
||||
getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
|
||||
getImageDesc().image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
|
||||
this->imageDesc.image_depth = 0;
|
||||
}
|
||||
|
||||
switch (imageDesc.image_type) {
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D;
|
||||
break;
|
||||
default:
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_3D;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void setImageArg(void *memory, bool setAsMediaBlockImage, uint32_t mipLevel) override;
|
||||
void setAuxParamsForMultisamples(RENDER_SURFACE_STATE *surfaceState);
|
||||
MOCKABLE_VIRTUAL void setAuxParamsForMCSCCS(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm);
|
||||
void setMediaImageArg(void *memory) override;
|
||||
void setMediaSurfaceRotation(void *memory) override;
|
||||
void setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) override;
|
||||
void appendSurfaceStateParams(RENDER_SURFACE_STATE *surfaceState);
|
||||
void appendSurfaceStateDepthParams(RENDER_SURFACE_STATE *surfaceState);
|
||||
void appendSurfaceStateExt(void *memory);
|
||||
void transformImage2dArrayTo3d(void *memory) override;
|
||||
void transformImage3dTo2dArray(void *memory) override;
|
||||
static Image *create(Context *context,
|
||||
const MemoryPropertiesFlags &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *hostPtr,
|
||||
const cl_image_format &imageFormat,
|
||||
const cl_image_desc &imageDesc,
|
||||
bool zeroCopy,
|
||||
GraphicsAllocation *graphicsAllocation,
|
||||
bool isObjectRedescribed,
|
||||
uint32_t baseMipLevel,
|
||||
uint32_t mipCount,
|
||||
const ClSurfaceFormatInfo *surfaceFormatInfo,
|
||||
const SurfaceOffsets *surfaceOffsets) {
|
||||
UNRECOVERABLE_IF(surfaceFormatInfo == nullptr);
|
||||
return new ImageHw<GfxFamily>(context,
|
||||
memoryProperties,
|
||||
flags,
|
||||
flagsIntel,
|
||||
size,
|
||||
hostPtr,
|
||||
imageFormat,
|
||||
imageDesc,
|
||||
zeroCopy,
|
||||
graphicsAllocation,
|
||||
isObjectRedescribed,
|
||||
baseMipLevel,
|
||||
mipCount,
|
||||
*surfaceFormatInfo,
|
||||
surfaceOffsets);
|
||||
}
|
||||
|
||||
static int getShaderChannelValue(int inputShaderChannel, cl_channel_order imageChannelOrder) {
|
||||
if (imageChannelOrder == CL_A) {
|
||||
if (inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED ||
|
||||
inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN ||
|
||||
inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE) {
|
||||
return RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO;
|
||||
}
|
||||
} else if (imageChannelOrder == CL_R ||
|
||||
imageChannelOrder == CL_RA ||
|
||||
imageChannelOrder == CL_Rx) {
|
||||
if (inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN ||
|
||||
inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE) {
|
||||
return RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO;
|
||||
}
|
||||
} else if (imageChannelOrder == CL_RG ||
|
||||
imageChannelOrder == CL_RGx) {
|
||||
if (inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE) {
|
||||
return RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO;
|
||||
}
|
||||
}
|
||||
return inputShaderChannel;
|
||||
}
|
||||
typename RENDER_SURFACE_STATE::SURFACE_TYPE surfaceType;
|
||||
};
|
||||
} // namespace NEO
|
||||
197
opencl/source/mem_obj/image.inl
Normal file
197
opencl/source/mem_obj/image.inl
Normal file
@@ -0,0 +1,197 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "core/execution_environment/execution_environment.h"
|
||||
#include "core/gmm_helper/gmm.h"
|
||||
#include "core/gmm_helper/gmm_helper.h"
|
||||
#include "core/gmm_helper/resource_info.h"
|
||||
#include "core/helpers/aligned_memory.h"
|
||||
#include "core/helpers/hw_cmds.h"
|
||||
|
||||
#include "helpers/surface_formats.h"
|
||||
#include "image_ext.inl"
|
||||
#include "mem_obj/image.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
union SURFACE_STATE_BUFFER_LENGTH {
|
||||
uint32_t Length;
|
||||
struct SurfaceState {
|
||||
uint32_t Width : BITFIELD_RANGE(0, 6);
|
||||
uint32_t Height : BITFIELD_RANGE(7, 20);
|
||||
uint32_t Depth : BITFIELD_RANGE(21, 31);
|
||||
} SurfaceState;
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
void ImageHw<GfxFamily>::setImageArg(void *memory, bool setAsMediaBlockImage, uint32_t mipLevel) {
|
||||
using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
|
||||
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(memory);
|
||||
|
||||
auto gmm = getGraphicsAllocation()->getDefaultGmm();
|
||||
auto gmmHelper = executionEnvironment->getGmmHelper();
|
||||
|
||||
auto imageDescriptor = Image::convertDescriptor(getImageDesc());
|
||||
ImageInfo imgInfo;
|
||||
imgInfo.imgDesc = imageDescriptor;
|
||||
imgInfo.qPitch = qPitch;
|
||||
imgInfo.surfaceFormat = &getSurfaceFormatInfo().surfaceFormat;
|
||||
|
||||
setImageSurfaceState<GfxFamily>(surfaceState, imgInfo, getGraphicsAllocation()->getDefaultGmm(), *gmmHelper, cubeFaceIndex, getGraphicsAllocation()->getGpuAddress(), surfaceOffsets, IsNV12Image(&this->getImageFormat()));
|
||||
|
||||
if (getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
|
||||
// image1d_buffer is image1d created from buffer. The length of buffer could be larger
|
||||
// than the maximal image width. Mock image1d_buffer with SURFACE_TYPE_SURFTYPE_BUFFER.
|
||||
SURFACE_STATE_BUFFER_LENGTH Length = {0};
|
||||
Length.Length = static_cast<uint32_t>(getImageDesc().image_width - 1);
|
||||
|
||||
surfaceState->setWidth(static_cast<uint32_t>(Length.SurfaceState.Width + 1));
|
||||
surfaceState->setHeight(static_cast<uint32_t>(Length.SurfaceState.Height + 1));
|
||||
surfaceState->setDepth(static_cast<uint32_t>(Length.SurfaceState.Depth + 1));
|
||||
surfaceState->setSurfacePitch(static_cast<uint32_t>(getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes));
|
||||
surfaceState->setSurfaceType(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER);
|
||||
} else {
|
||||
setImageSurfaceStateDimensions<GfxFamily>(surfaceState, imgInfo, cubeFaceIndex, surfaceType);
|
||||
if (setAsMediaBlockImage) {
|
||||
uint32_t elSize = static_cast<uint32_t>(getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes);
|
||||
surfaceState->setWidth(static_cast<uint32_t>((getImageDesc().image_width * elSize) / sizeof(uint32_t)));
|
||||
}
|
||||
}
|
||||
|
||||
surfaceState->setSurfaceMinLod(this->baseMipLevel + mipLevel);
|
||||
surfaceState->setMipCountLod((this->mipCount > 0) ? (this->mipCount - 1) : 0);
|
||||
setMipTailStartLod<GfxFamily>(surfaceState, gmm);
|
||||
|
||||
cl_channel_order imgChannelOrder = getSurfaceFormatInfo().OCLImageFormat.image_channel_order;
|
||||
int shaderChannelValue = ImageHw<GfxFamily>::getShaderChannelValue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, imgChannelOrder);
|
||||
surfaceState->setShaderChannelSelectRed(static_cast<typename RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT>(shaderChannelValue));
|
||||
|
||||
if (imgChannelOrder == CL_LUMINANCE) {
|
||||
surfaceState->setShaderChannelSelectGreen(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED);
|
||||
surfaceState->setShaderChannelSelectBlue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED);
|
||||
} else {
|
||||
shaderChannelValue = ImageHw<GfxFamily>::getShaderChannelValue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN, imgChannelOrder);
|
||||
surfaceState->setShaderChannelSelectGreen(static_cast<typename RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT>(shaderChannelValue));
|
||||
shaderChannelValue = ImageHw<GfxFamily>::getShaderChannelValue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE, imgChannelOrder);
|
||||
surfaceState->setShaderChannelSelectBlue(static_cast<typename RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT>(shaderChannelValue));
|
||||
}
|
||||
|
||||
surfaceState->setNumberOfMultisamples((typename RENDER_SURFACE_STATE::NUMBER_OF_MULTISAMPLES)mcsSurfaceInfo.multisampleCount);
|
||||
|
||||
if (imageDesc.num_samples > 1) {
|
||||
setAuxParamsForMultisamples(surfaceState);
|
||||
} else if (gmm && gmm->isRenderCompressed) {
|
||||
setAuxParamsForCCS<GfxFamily>(surfaceState, gmm);
|
||||
}
|
||||
appendSurfaceStateDepthParams(surfaceState);
|
||||
appendSurfaceStateParams(surfaceState);
|
||||
appendSurfaceStateExt(surfaceState);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void ImageHw<GfxFamily>::setAuxParamsForMultisamples(RENDER_SURFACE_STATE *surfaceState) {
|
||||
using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
|
||||
|
||||
if (getMcsAllocation()) {
|
||||
auto mcsGmm = getMcsAllocation()->getDefaultGmm();
|
||||
|
||||
if (mcsGmm->unifiedAuxTranslationCapable() && mcsGmm->hasMultisampleControlSurface()) {
|
||||
setAuxParamsForMCSCCS(surfaceState, mcsGmm);
|
||||
surfaceState->setAuxiliarySurfacePitch(mcsGmm->getUnifiedAuxPitchTiles());
|
||||
surfaceState->setAuxiliarySurfaceQpitch(mcsGmm->getAuxQPitch());
|
||||
setClearColorParams<GfxFamily>(surfaceState, mcsGmm);
|
||||
setUnifiedAuxBaseAddress<GfxFamily>(surfaceState, mcsGmm);
|
||||
} else if (mcsGmm->unifiedAuxTranslationCapable()) {
|
||||
setAuxParamsForCCS<GfxFamily>(surfaceState, mcsGmm);
|
||||
} else {
|
||||
surfaceState->setAuxiliarySurfaceMode((typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE)1);
|
||||
surfaceState->setAuxiliarySurfacePitch(mcsSurfaceInfo.pitch);
|
||||
surfaceState->setAuxiliarySurfaceQpitch(mcsSurfaceInfo.qPitch);
|
||||
surfaceState->setAuxiliarySurfaceBaseAddress(mcsAllocation->getGpuAddress());
|
||||
}
|
||||
} else if (isDepthFormat(imageFormat) && surfaceState->getSurfaceFormat() != SURFACE_FORMAT::SURFACE_FORMAT_R32_FLOAT_X8X24_TYPELESS) {
|
||||
surfaceState->setMultisampledSurfaceStorageFormat(RENDER_SURFACE_STATE::MULTISAMPLED_SURFACE_STORAGE_FORMAT::MULTISAMPLED_SURFACE_STORAGE_FORMAT_DEPTH_STENCIL);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void ImageHw<GfxFamily>::appendSurfaceStateParams(RENDER_SURFACE_STATE *surfaceState) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void ImageHw<GfxFamily>::appendSurfaceStateDepthParams(RENDER_SURFACE_STATE *surfaceState) {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void ImageHw<GfxFamily>::setMediaImageArg(void *memory) {
|
||||
using MEDIA_SURFACE_STATE = typename GfxFamily::MEDIA_SURFACE_STATE;
|
||||
using SURFACE_FORMAT = typename MEDIA_SURFACE_STATE::SURFACE_FORMAT;
|
||||
SURFACE_FORMAT surfaceFormat = MEDIA_SURFACE_STATE::SURFACE_FORMAT_Y8_UNORM_VA;
|
||||
|
||||
auto gmmHelper = executionEnvironment->getGmmHelper();
|
||||
auto surfaceState = reinterpret_cast<MEDIA_SURFACE_STATE *>(memory);
|
||||
*surfaceState = GfxFamily::cmdInitMediaSurfaceState;
|
||||
|
||||
setMediaSurfaceRotation(reinterpret_cast<void *>(surfaceState));
|
||||
|
||||
DEBUG_BREAK_IF(surfaceFormat == MEDIA_SURFACE_STATE::SURFACE_FORMAT_Y1_UNORM);
|
||||
surfaceState->setWidth(static_cast<uint32_t>(getImageDesc().image_width));
|
||||
|
||||
surfaceState->setHeight(static_cast<uint32_t>(getImageDesc().image_height));
|
||||
surfaceState->setPictureStructure(MEDIA_SURFACE_STATE::PICTURE_STRUCTURE_FRAME_PICTURE);
|
||||
|
||||
auto gmm = getGraphicsAllocation()->getDefaultGmm();
|
||||
auto tileMode = static_cast<typename MEDIA_SURFACE_STATE::TILE_MODE>(gmm->gmmResourceInfo->getTileModeSurfaceState());
|
||||
|
||||
surfaceState->setTileMode(tileMode);
|
||||
surfaceState->setSurfacePitch(static_cast<uint32_t>(getImageDesc().image_row_pitch));
|
||||
|
||||
surfaceState->setSurfaceFormat(surfaceFormat);
|
||||
|
||||
surfaceState->setHalfPitchForChroma(false);
|
||||
surfaceState->setInterleaveChroma(false);
|
||||
surfaceState->setXOffsetForUCb(0);
|
||||
surfaceState->setYOffsetForUCb(0);
|
||||
surfaceState->setXOffsetForVCr(0);
|
||||
surfaceState->setYOffsetForVCr(0);
|
||||
|
||||
setSurfaceMemoryObjectControlStateIndexToMocsTable(
|
||||
reinterpret_cast<void *>(surfaceState),
|
||||
gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_IMAGE));
|
||||
|
||||
if (IsNV12Image(&this->getImageFormat())) {
|
||||
surfaceState->setInterleaveChroma(true);
|
||||
surfaceState->setYOffsetForUCb(this->surfaceOffsets.yOffsetForUVplane);
|
||||
}
|
||||
|
||||
surfaceState->setVerticalLineStride(0);
|
||||
surfaceState->setVerticalLineStrideOffset(0);
|
||||
|
||||
surfaceState->setSurfaceBaseAddress(getGraphicsAllocation()->getGpuAddress() + this->surfaceOffsets.offset);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void ImageHw<GfxFamily>::transformImage2dArrayTo3d(void *memory) {
|
||||
DEBUG_BREAK_IF(imageDesc.image_type != CL_MEM_OBJECT_IMAGE3D);
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(memory);
|
||||
surfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D);
|
||||
surfaceState->setSurfaceArray(false);
|
||||
}
|
||||
template <typename GfxFamily>
|
||||
void ImageHw<GfxFamily>::transformImage3dTo2dArray(void *memory) {
|
||||
DEBUG_BREAK_IF(imageDesc.image_type != CL_MEM_OBJECT_IMAGE3D);
|
||||
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
|
||||
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(memory);
|
||||
surfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D);
|
||||
surfaceState->setSurfaceArray(true);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void ImageHw<GfxFamily>::setAuxParamsForMCSCCS(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm) {
|
||||
}
|
||||
} // namespace NEO
|
||||
13
opencl/source/mem_obj/image_factory_init.inl
Normal file
13
opencl/source/mem_obj/image_factory_init.inl
Normal file
@@ -0,0 +1,13 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
template class ImageHw<Family>;
|
||||
template <>
|
||||
void populateFactoryTable<ImageHw<Family>>() {
|
||||
extern ImageFuncs imageFactory[IGFX_MAX_CORE];
|
||||
imageFactory[gfxCore].createImageFunction = ImageHw<Family>::create;
|
||||
}
|
||||
20
opencl/source/mem_obj/image_tgllp_plus.inl
Normal file
20
opencl/source/mem_obj/image_tgllp_plus.inl
Normal file
@@ -0,0 +1,20 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
template <>
|
||||
void ImageHw<Family>::setAuxParamsForMCSCCS(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm) {
|
||||
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_MCS_LCE);
|
||||
}
|
||||
|
||||
template <>
|
||||
void ImageHw<Family>::appendSurfaceStateDepthParams(RENDER_SURFACE_STATE *surfaceState) {
|
||||
const auto gmm = this->graphicsAllocation->getDefaultGmm();
|
||||
if (gmm) {
|
||||
const bool isDepthResource = gmm->gmmResourceInfo->getResourceFlags()->Gpu.Depth;
|
||||
surfaceState->setDepthStencilResource(isDepthResource);
|
||||
}
|
||||
}
|
||||
74
opencl/source/mem_obj/map_operations_handler.cpp
Normal file
74
opencl/source/mem_obj/map_operations_handler.cpp
Normal file
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "mem_obj/map_operations_handler.h"
|
||||
|
||||
#include "core/helpers/ptr_math.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
size_t MapOperationsHandler::size() const {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
return mappedPointers.size();
|
||||
}
|
||||
|
||||
bool MapOperationsHandler::add(void *ptr, size_t ptrLength, cl_map_flags &mapFlags, MemObjSizeArray &size, MemObjOffsetArray &offset, uint32_t mipLevel) {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
MapInfo mapInfo(ptr, ptrLength, size, offset, mipLevel);
|
||||
mapInfo.readOnly = (mapFlags == CL_MAP_READ);
|
||||
|
||||
if (isOverlapping(mapInfo)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
mappedPointers.push_back(mapInfo);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MapOperationsHandler::isOverlapping(MapInfo &inputMapInfo) {
|
||||
if (inputMapInfo.readOnly) {
|
||||
return false;
|
||||
}
|
||||
auto inputStartPtr = inputMapInfo.ptr;
|
||||
auto inputEndPtr = ptrOffset(inputStartPtr, inputMapInfo.ptrLength);
|
||||
|
||||
for (auto &mapInfo : mappedPointers) {
|
||||
auto mappedStartPtr = mapInfo.ptr;
|
||||
auto mappedEndPtr = ptrOffset(mappedStartPtr, mapInfo.ptrLength);
|
||||
|
||||
// Requested ptr starts before or inside existing ptr range and overlapping end
|
||||
if (inputStartPtr < mappedEndPtr && inputEndPtr >= mappedStartPtr) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool MapOperationsHandler::find(void *mappedPtr, MapInfo &outMapInfo) {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
|
||||
for (auto &mapInfo : mappedPointers) {
|
||||
if (mapInfo.ptr == mappedPtr) {
|
||||
outMapInfo = mapInfo;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void MapOperationsHandler::remove(void *mappedPtr) {
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
|
||||
auto endIter = mappedPointers.end();
|
||||
for (auto it = mappedPointers.begin(); it != endIter; it++) {
|
||||
if (it->ptr == mappedPtr) {
|
||||
std::iter_swap(it, mappedPointers.end() - 1);
|
||||
mappedPointers.pop_back();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
31
opencl/source/mem_obj/map_operations_handler.h
Normal file
31
opencl/source/mem_obj/map_operations_handler.h
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "helpers/properties_helper.h"
|
||||
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class MapOperationsHandler {
|
||||
public:
|
||||
virtual ~MapOperationsHandler() = default;
|
||||
|
||||
bool add(void *ptr, size_t ptrLength, cl_map_flags &mapFlags, MemObjSizeArray &size, MemObjOffsetArray &offset, uint32_t mipLevel);
|
||||
void remove(void *mappedPtr);
|
||||
bool find(void *mappedPtr, MapInfo &outMapInfo);
|
||||
size_t size() const;
|
||||
|
||||
protected:
|
||||
bool isOverlapping(MapInfo &inputMapInfo);
|
||||
std::vector<MapInfo> mappedPointers;
|
||||
mutable std::mutex mtx;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
363
opencl/source/mem_obj/mem_obj.cpp
Normal file
363
opencl/source/mem_obj/mem_obj.cpp
Normal file
@@ -0,0 +1,363 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "mem_obj/mem_obj.h"
|
||||
|
||||
#include "core/command_stream/command_stream_receiver.h"
|
||||
#include "core/gmm_helper/gmm.h"
|
||||
#include "core/gmm_helper/resource_info.h"
|
||||
#include "core/helpers/aligned_memory.h"
|
||||
#include "core/helpers/bit_helpers.h"
|
||||
#include "core/helpers/get_info.h"
|
||||
#include "core/memory_manager/deferred_deleter.h"
|
||||
#include "core/memory_manager/internal_allocation_storage.h"
|
||||
#include "core/memory_manager/memory_manager.h"
|
||||
#include "core/os_interface/os_context.h"
|
||||
|
||||
#include "command_queue/command_queue.h"
|
||||
#include "context/context.h"
|
||||
#include "device/cl_device.h"
|
||||
#include "helpers/get_info_status_mapper.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
MemObj::MemObj(Context *context,
|
||||
cl_mem_object_type memObjectType,
|
||||
const MemoryPropertiesFlags &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *memoryStorage,
|
||||
void *hostPtr,
|
||||
GraphicsAllocation *gfxAllocation,
|
||||
bool zeroCopy,
|
||||
bool isHostPtrSVM,
|
||||
bool isObjectRedescribed)
|
||||
: context(context), memObjectType(memObjectType), memoryProperties(memoryProperties), flags(flags), flagsIntel(flagsIntel), size(size),
|
||||
memoryStorage(memoryStorage), hostPtr(hostPtr),
|
||||
isZeroCopy(zeroCopy), isHostPtrSVM(isHostPtrSVM), isObjectRedescribed(isObjectRedescribed),
|
||||
graphicsAllocation(gfxAllocation) {
|
||||
|
||||
if (context) {
|
||||
context->incRefInternal();
|
||||
memoryManager = context->getMemoryManager();
|
||||
executionEnvironment = context->getDevice(0)->getExecutionEnvironment();
|
||||
}
|
||||
}
|
||||
|
||||
MemObj::~MemObj() {
|
||||
bool needWait = false;
|
||||
if (allocatedMapPtr != nullptr) {
|
||||
needWait = true;
|
||||
}
|
||||
if (mapOperationsHandler.size() > 0 && !getCpuAddressForMapping()) {
|
||||
needWait = true;
|
||||
}
|
||||
if (!destructorCallbacks.empty()) {
|
||||
needWait = true;
|
||||
}
|
||||
|
||||
if (memoryManager && !isObjectRedescribed) {
|
||||
if (peekSharingHandler()) {
|
||||
peekSharingHandler()->releaseReusedGraphicsAllocation();
|
||||
}
|
||||
if (graphicsAllocation && !associatedMemObject && !isHostPtrSVM && graphicsAllocation->peekReuseCount() == 0) {
|
||||
memoryManager->removeAllocationFromHostPtrManager(graphicsAllocation);
|
||||
bool doAsyncDestructions = DebugManager.flags.EnableAsyncDestroyAllocations.get();
|
||||
if (!doAsyncDestructions) {
|
||||
needWait = true;
|
||||
}
|
||||
if (needWait && graphicsAllocation->isUsed()) {
|
||||
memoryManager->waitForEnginesCompletion(*graphicsAllocation);
|
||||
}
|
||||
destroyGraphicsAllocation(graphicsAllocation, doAsyncDestructions);
|
||||
graphicsAllocation = nullptr;
|
||||
}
|
||||
|
||||
if (!associatedMemObject) {
|
||||
releaseMapAllocation();
|
||||
releaseAllocatedMapPtr();
|
||||
}
|
||||
if (mcsAllocation) {
|
||||
destroyGraphicsAllocation(mcsAllocation, false);
|
||||
}
|
||||
|
||||
if (associatedMemObject) {
|
||||
if (associatedMemObject->getGraphicsAllocation() != this->getGraphicsAllocation()) {
|
||||
destroyGraphicsAllocation(graphicsAllocation, false);
|
||||
}
|
||||
associatedMemObject->decRefInternal();
|
||||
}
|
||||
}
|
||||
if (!destructorCallbacks.empty()) {
|
||||
for (auto iter = destructorCallbacks.rbegin(); iter != destructorCallbacks.rend(); iter++) {
|
||||
(*iter)->invoke(this);
|
||||
delete *iter;
|
||||
}
|
||||
}
|
||||
|
||||
if (context) {
|
||||
context->decRefInternal();
|
||||
}
|
||||
}
|
||||
|
||||
void MemObj::DestructorCallback::invoke(cl_mem memObj) {
|
||||
this->funcNotify(memObj, userData);
|
||||
}
|
||||
|
||||
cl_int MemObj::getMemObjectInfo(cl_mem_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet) {
|
||||
cl_int retVal;
|
||||
size_t srcParamSize = 0;
|
||||
void *srcParam = nullptr;
|
||||
cl_bool usesSVMPointer;
|
||||
cl_uint refCnt = 0;
|
||||
cl_uint mapCount = 0;
|
||||
cl_mem clAssociatedMemObject = static_cast<cl_mem>(this->associatedMemObject);
|
||||
cl_context ctx = nullptr;
|
||||
uint64_t internalHandle = 0llu;
|
||||
|
||||
switch (paramName) {
|
||||
case CL_MEM_TYPE:
|
||||
srcParamSize = sizeof(memObjectType);
|
||||
srcParam = &memObjectType;
|
||||
break;
|
||||
|
||||
case CL_MEM_FLAGS:
|
||||
srcParamSize = sizeof(flags);
|
||||
srcParam = &flags;
|
||||
break;
|
||||
|
||||
case CL_MEM_SIZE:
|
||||
srcParamSize = sizeof(size);
|
||||
srcParam = &size;
|
||||
break;
|
||||
|
||||
case CL_MEM_HOST_PTR:
|
||||
srcParamSize = sizeof(hostPtr);
|
||||
srcParam = &hostPtr;
|
||||
break;
|
||||
|
||||
case CL_MEM_CONTEXT:
|
||||
srcParamSize = sizeof(context);
|
||||
ctx = context;
|
||||
srcParam = &ctx;
|
||||
break;
|
||||
|
||||
case CL_MEM_USES_SVM_POINTER:
|
||||
usesSVMPointer = isHostPtrSVM && isValueSet(flags, CL_MEM_USE_HOST_PTR);
|
||||
srcParamSize = sizeof(cl_bool);
|
||||
srcParam = &usesSVMPointer;
|
||||
break;
|
||||
|
||||
case CL_MEM_OFFSET:
|
||||
srcParamSize = sizeof(offset);
|
||||
srcParam = &offset;
|
||||
break;
|
||||
|
||||
case CL_MEM_ASSOCIATED_MEMOBJECT:
|
||||
srcParamSize = sizeof(clAssociatedMemObject);
|
||||
srcParam = &clAssociatedMemObject;
|
||||
break;
|
||||
|
||||
case CL_MEM_MAP_COUNT:
|
||||
srcParamSize = sizeof(mapCount);
|
||||
mapCount = static_cast<cl_uint>(mapOperationsHandler.size());
|
||||
srcParam = &mapCount;
|
||||
break;
|
||||
|
||||
case CL_MEM_REFERENCE_COUNT:
|
||||
refCnt = static_cast<cl_uint>(this->getReference());
|
||||
srcParamSize = sizeof(refCnt);
|
||||
srcParam = &refCnt;
|
||||
break;
|
||||
case CL_MEM_ALLOCATION_HANDLE_INTEL:
|
||||
internalHandle = this->getGraphicsAllocation()->peekInternalHandle(this->memoryManager);
|
||||
srcParamSize = sizeof(internalHandle);
|
||||
srcParam = &internalHandle;
|
||||
break;
|
||||
|
||||
default:
|
||||
getOsSpecificMemObjectInfo(paramName, &srcParamSize, &srcParam);
|
||||
break;
|
||||
}
|
||||
|
||||
retVal = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, srcParam, srcParamSize));
|
||||
|
||||
if (paramValueSizeRet) {
|
||||
*paramValueSizeRet = srcParamSize;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
cl_int MemObj::setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_mem, void *),
|
||||
void *userData) {
|
||||
auto cb = new DestructorCallback(funcNotify, userData);
|
||||
|
||||
std::unique_lock<std::mutex> theLock(mtx);
|
||||
destructorCallbacks.push_back(cb);
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
void *MemObj::getCpuAddress() const {
|
||||
return memoryStorage;
|
||||
}
|
||||
|
||||
void *MemObj::getHostPtr() const {
|
||||
return hostPtr;
|
||||
}
|
||||
|
||||
size_t MemObj::getSize() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
void MemObj::setAllocatedMapPtr(void *allocatedMapPtr) {
|
||||
this->allocatedMapPtr = allocatedMapPtr;
|
||||
}
|
||||
|
||||
bool MemObj::isMemObjZeroCopy() const {
|
||||
return isZeroCopy;
|
||||
}
|
||||
|
||||
bool MemObj::isMemObjWithHostPtrSVM() const {
|
||||
return isHostPtrSVM;
|
||||
}
|
||||
|
||||
bool MemObj::isMemObjUncacheable() const {
|
||||
return isValueSet(flagsIntel, CL_MEM_LOCALLY_UNCACHED_RESOURCE);
|
||||
}
|
||||
|
||||
bool MemObj::isMemObjUncacheableForSurfaceState() const {
|
||||
return isAnyBitSet(flagsIntel, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE | CL_MEM_LOCALLY_UNCACHED_RESOURCE);
|
||||
}
|
||||
|
||||
GraphicsAllocation *MemObj::getGraphicsAllocation() const {
|
||||
return graphicsAllocation;
|
||||
}
|
||||
|
||||
void MemObj::resetGraphicsAllocation(GraphicsAllocation *newGraphicsAllocation) {
|
||||
TakeOwnershipWrapper<MemObj> lock(*this);
|
||||
|
||||
if (graphicsAllocation != nullptr && (peekSharingHandler() == nullptr || graphicsAllocation->peekReuseCount() == 0)) {
|
||||
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation);
|
||||
}
|
||||
|
||||
graphicsAllocation = newGraphicsAllocation;
|
||||
}
|
||||
|
||||
bool MemObj::readMemObjFlagsInvalid() {
|
||||
return isValueSet(flags, CL_MEM_HOST_WRITE_ONLY) || isValueSet(flags, CL_MEM_HOST_NO_ACCESS);
|
||||
}
|
||||
|
||||
bool MemObj::writeMemObjFlagsInvalid() {
|
||||
return isValueSet(flags, CL_MEM_HOST_READ_ONLY) || isValueSet(flags, CL_MEM_HOST_NO_ACCESS);
|
||||
}
|
||||
|
||||
bool MemObj::mapMemObjFlagsInvalid(cl_map_flags mapFlags) {
|
||||
return (writeMemObjFlagsInvalid() && (mapFlags & CL_MAP_WRITE)) ||
|
||||
(readMemObjFlagsInvalid() && (mapFlags & CL_MAP_READ));
|
||||
}
|
||||
|
||||
void MemObj::setHostPtrMinSize(size_t size) {
|
||||
hostPtrMinSize = size;
|
||||
}
|
||||
|
||||
void *MemObj::getCpuAddressForMapping() {
|
||||
void *ptrToReturn = nullptr;
|
||||
if (isValueSet(flags, CL_MEM_USE_HOST_PTR)) {
|
||||
ptrToReturn = this->hostPtr;
|
||||
} else {
|
||||
ptrToReturn = this->memoryStorage;
|
||||
}
|
||||
return ptrToReturn;
|
||||
}
|
||||
void *MemObj::getCpuAddressForMemoryTransfer() {
|
||||
void *ptrToReturn = nullptr;
|
||||
if (isValueSet(flags, CL_MEM_USE_HOST_PTR) && this->isMemObjZeroCopy()) {
|
||||
ptrToReturn = this->hostPtr;
|
||||
} else {
|
||||
ptrToReturn = this->memoryStorage;
|
||||
}
|
||||
return ptrToReturn;
|
||||
}
|
||||
void MemObj::releaseAllocatedMapPtr() {
|
||||
if (allocatedMapPtr) {
|
||||
DEBUG_BREAK_IF(isValueSet(flags, CL_MEM_USE_HOST_PTR));
|
||||
memoryManager->freeSystemMemory(allocatedMapPtr);
|
||||
}
|
||||
allocatedMapPtr = nullptr;
|
||||
}
|
||||
|
||||
void MemObj::releaseMapAllocation() {
|
||||
if (mapAllocation && !isHostPtrSVM) {
|
||||
destroyGraphicsAllocation(mapAllocation, false);
|
||||
}
|
||||
}
|
||||
|
||||
void MemObj::destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyncDestroy) {
|
||||
if (asyncDestroy) {
|
||||
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(allocation);
|
||||
} else {
|
||||
memoryManager->freeGraphicsMemory(allocation);
|
||||
}
|
||||
}
|
||||
|
||||
bool MemObj::checkIfMemoryTransferIsRequired(size_t offsetInMemObject, size_t offsetInHostPtr, const void *hostPtr, cl_command_type cmdType) {
|
||||
auto bufferStorage = ptrOffset(this->getCpuAddressForMemoryTransfer(), offsetInMemObject);
|
||||
auto hostStorage = ptrOffset(hostPtr, offsetInHostPtr);
|
||||
auto isMemTransferNeeded = !((bufferStorage == hostStorage) &&
|
||||
(cmdType == CL_COMMAND_WRITE_BUFFER || cmdType == CL_COMMAND_READ_BUFFER ||
|
||||
cmdType == CL_COMMAND_WRITE_BUFFER_RECT || cmdType == CL_COMMAND_READ_BUFFER_RECT ||
|
||||
cmdType == CL_COMMAND_WRITE_IMAGE || cmdType == CL_COMMAND_READ_IMAGE));
|
||||
return isMemTransferNeeded;
|
||||
}
|
||||
|
||||
void *MemObj::getBasePtrForMap(uint32_t rootDeviceIndex) {
|
||||
if (associatedMemObject) {
|
||||
return associatedMemObject->getBasePtrForMap(rootDeviceIndex);
|
||||
}
|
||||
if (getMemoryPropertiesFlags() & CL_MEM_USE_HOST_PTR) {
|
||||
return getHostPtr();
|
||||
} else {
|
||||
TakeOwnershipWrapper<MemObj> memObjOwnership(*this);
|
||||
if (getMapAllocation()) {
|
||||
return getMapAllocation()->getUnderlyingBuffer();
|
||||
} else {
|
||||
auto memory = memoryManager->allocateSystemMemory(getSize(), MemoryConstants::pageSize);
|
||||
setAllocatedMapPtr(memory);
|
||||
AllocationProperties properties{rootDeviceIndex, false, getSize(), GraphicsAllocation::AllocationType::MAP_ALLOCATION, false};
|
||||
|
||||
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, memory);
|
||||
setMapAllocation(allocation);
|
||||
return getAllocatedMapPtr();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool MemObj::addMappedPtr(void *ptr, size_t ptrLength, cl_map_flags &mapFlags,
|
||||
MemObjSizeArray &size, MemObjOffsetArray &offset,
|
||||
uint32_t mipLevel) {
|
||||
return mapOperationsHandler.add(ptr, ptrLength, mapFlags, size, offset,
|
||||
mipLevel);
|
||||
}
|
||||
|
||||
bool MemObj::isTiledAllocation() const {
|
||||
auto gmm = graphicsAllocation->getDefaultGmm();
|
||||
return gmm && (gmm->gmmResourceInfo->getTileModeSurfaceState() != 0);
|
||||
}
|
||||
|
||||
bool MemObj::mappingOnCpuAllowed() const {
|
||||
return !isTiledAllocation() && !peekSharingHandler() && !isMipMapped(this) && !DebugManager.flags.DisableZeroCopyForBuffers.get() &&
|
||||
!(graphicsAllocation->getDefaultGmm() && graphicsAllocation->getDefaultGmm()->isRenderCompressed) &&
|
||||
MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool());
|
||||
}
|
||||
} // namespace NEO
|
||||
171
opencl/source/mem_obj/mem_obj.h
Normal file
171
opencl/source/mem_obj/mem_obj.h
Normal file
@@ -0,0 +1,171 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "core/debug_settings/debug_settings_manager.h"
|
||||
|
||||
#include "api/cl_types.h"
|
||||
#include "extensions/public/cl_ext_private.h"
|
||||
#include "helpers/base_object.h"
|
||||
#include "helpers/mipmap.h"
|
||||
#include "mem_obj/map_operations_handler.h"
|
||||
#include "memory_properties_flags.h"
|
||||
#include "sharings/sharing.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
namespace NEO {
|
||||
class ExecutionEnvironment;
|
||||
class GraphicsAllocation;
|
||||
struct KernelInfo;
|
||||
class MemoryManager;
|
||||
class Context;
|
||||
|
||||
template <>
|
||||
struct OpenCLObjectMapper<_cl_mem> {
|
||||
typedef class MemObj DerivedType;
|
||||
};
|
||||
|
||||
class MemObj : public BaseObject<_cl_mem> {
|
||||
public:
|
||||
constexpr static cl_ulong maskMagic = 0xFFFFFFFFFFFFFF00LL;
|
||||
constexpr static cl_ulong objectMagic = 0xAB2212340CACDD00LL;
|
||||
|
||||
MemObj(Context *context,
|
||||
cl_mem_object_type memObjectType,
|
||||
const MemoryPropertiesFlags &memoryProperties,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
size_t size,
|
||||
void *memoryStorage,
|
||||
void *hostPtr,
|
||||
GraphicsAllocation *gfxAllocation,
|
||||
bool zeroCopy,
|
||||
bool isHostPtrSVM,
|
||||
bool isObjectRedescrbied);
|
||||
~MemObj() override;
|
||||
|
||||
cl_int getMemObjectInfo(cl_mem_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
cl_int setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_mem, void *),
|
||||
void *userData);
|
||||
|
||||
void *getCpuAddress() const;
|
||||
void *getHostPtr() const;
|
||||
bool getIsObjectRedescribed() const { return isObjectRedescribed; };
|
||||
size_t getSize() const;
|
||||
|
||||
bool addMappedPtr(void *ptr, size_t ptrLength, cl_map_flags &mapFlags, MemObjSizeArray &size, MemObjOffsetArray &offset, uint32_t mipLevel);
|
||||
bool findMappedPtr(void *mappedPtr, MapInfo &outMapInfo) { return mapOperationsHandler.find(mappedPtr, outMapInfo); }
|
||||
void removeMappedPtr(void *mappedPtr) { mapOperationsHandler.remove(mappedPtr); }
|
||||
void *getBasePtrForMap(uint32_t rootDeviceIndex);
|
||||
|
||||
MOCKABLE_VIRTUAL void setAllocatedMapPtr(void *allocatedMapPtr);
|
||||
void *getAllocatedMapPtr() const { return allocatedMapPtr; }
|
||||
|
||||
void setHostPtrMinSize(size_t size);
|
||||
void releaseAllocatedMapPtr();
|
||||
void releaseMapAllocation();
|
||||
|
||||
bool isMemObjZeroCopy() const;
|
||||
bool isMemObjWithHostPtrSVM() const;
|
||||
bool isMemObjUncacheable() const;
|
||||
bool isMemObjUncacheableForSurfaceState() const;
|
||||
virtual void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); };
|
||||
virtual void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); };
|
||||
|
||||
GraphicsAllocation *getGraphicsAllocation() const;
|
||||
void resetGraphicsAllocation(GraphicsAllocation *newGraphicsAllocation);
|
||||
GraphicsAllocation *getMcsAllocation() { return mcsAllocation; }
|
||||
void setMcsAllocation(GraphicsAllocation *alloc) { mcsAllocation = alloc; }
|
||||
|
||||
bool readMemObjFlagsInvalid();
|
||||
bool writeMemObjFlagsInvalid();
|
||||
bool mapMemObjFlagsInvalid(cl_map_flags mapFlags);
|
||||
|
||||
MOCKABLE_VIRTUAL bool isTiledAllocation() const;
|
||||
|
||||
void *getCpuAddressForMapping();
|
||||
void *getCpuAddressForMemoryTransfer();
|
||||
|
||||
std::shared_ptr<SharingHandler> &getSharingHandler() { return sharingHandler; }
|
||||
SharingHandler *peekSharingHandler() const { return sharingHandler.get(); }
|
||||
void setSharingHandler(SharingHandler *sharingHandler) { this->sharingHandler.reset(sharingHandler); }
|
||||
void setParentSharingHandler(std::shared_ptr<SharingHandler> &handler) { sharingHandler = handler; }
|
||||
unsigned int acquireCount = 0;
|
||||
Context *getContext() const { return context; }
|
||||
|
||||
void destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyncDestroy);
|
||||
bool checkIfMemoryTransferIsRequired(size_t offsetInMemObject, size_t offsetInHostPtr, const void *ptr, cl_command_type cmdType);
|
||||
bool mappingOnCpuAllowed() const;
|
||||
virtual size_t calculateOffsetForMapping(const MemObjOffsetArray &offset) const { return offset[0]; }
|
||||
size_t calculateMappedPtrLength(const MemObjSizeArray &size) const { return calculateOffsetForMapping(size); }
|
||||
cl_mem_object_type peekClMemObjType() const { return memObjectType; }
|
||||
size_t getOffset() const { return offset; }
|
||||
MemoryManager *getMemoryManager() const {
|
||||
return memoryManager;
|
||||
}
|
||||
void setMapAllocation(GraphicsAllocation *allocation) {
|
||||
mapAllocation = allocation;
|
||||
}
|
||||
GraphicsAllocation *getMapAllocation() const {
|
||||
if (associatedMemObject) {
|
||||
return associatedMemObject->getMapAllocation();
|
||||
}
|
||||
return mapAllocation;
|
||||
}
|
||||
|
||||
const cl_mem_flags &getMemoryPropertiesFlags() const { return flags; }
|
||||
const cl_mem_flags &getMemoryPropertiesFlagsIntel() const { return flagsIntel; }
|
||||
|
||||
protected:
|
||||
void getOsSpecificMemObjectInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam);
|
||||
|
||||
Context *context;
|
||||
cl_mem_object_type memObjectType;
|
||||
MemoryPropertiesFlags memoryProperties;
|
||||
cl_mem_flags flags = 0;
|
||||
cl_mem_flags_intel flagsIntel = 0;
|
||||
size_t size;
|
||||
size_t hostPtrMinSize = 0;
|
||||
void *memoryStorage;
|
||||
void *hostPtr;
|
||||
void *allocatedMapPtr = nullptr;
|
||||
MapOperationsHandler mapOperationsHandler;
|
||||
size_t offset = 0;
|
||||
MemObj *associatedMemObject = nullptr;
|
||||
cl_uint refCount = 0;
|
||||
ExecutionEnvironment *executionEnvironment = nullptr;
|
||||
bool isZeroCopy;
|
||||
bool isHostPtrSVM;
|
||||
bool isObjectRedescribed;
|
||||
MemoryManager *memoryManager = nullptr;
|
||||
GraphicsAllocation *graphicsAllocation;
|
||||
GraphicsAllocation *mcsAllocation = nullptr;
|
||||
GraphicsAllocation *mapAllocation = nullptr;
|
||||
std::shared_ptr<SharingHandler> sharingHandler;
|
||||
|
||||
class DestructorCallback {
|
||||
public:
|
||||
DestructorCallback(void(CL_CALLBACK *funcNotify)(cl_mem, void *),
|
||||
void *userData)
|
||||
: funcNotify(funcNotify), userData(userData){};
|
||||
|
||||
void invoke(cl_mem memObj);
|
||||
|
||||
private:
|
||||
void(CL_CALLBACK *funcNotify)(cl_mem, void *);
|
||||
void *userData;
|
||||
};
|
||||
|
||||
std::vector<DestructorCallback *> destructorCallbacks;
|
||||
};
|
||||
} // namespace NEO
|
||||
25
opencl/source/mem_obj/mem_obj_helper.cpp
Normal file
25
opencl/source/mem_obj/mem_obj_helper.cpp
Normal file
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "mem_obj/mem_obj_helper_common.inl"
|
||||
#include "memory_properties_flags.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
bool MemObjHelper::isSuitableForRenderCompression(bool renderCompressed, const MemoryPropertiesFlags &properties, Context &context, bool preferCompression) {
|
||||
return renderCompressed && preferCompression;
|
||||
}
|
||||
|
||||
bool MemObjHelper::validateExtraMemoryProperties(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const uint64_t MemObjHelper::extraFlags = 0;
|
||||
|
||||
const uint64_t MemObjHelper::extraFlagsIntel = 0;
|
||||
|
||||
} // namespace NEO
|
||||
42
opencl/source/mem_obj/mem_obj_helper.h
Normal file
42
opencl/source/mem_obj/mem_obj_helper.h
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "core/helpers/bit_helpers.h"
|
||||
#include "core/memory_manager/memory_manager.h"
|
||||
#include "core/memory_manager/unified_memory_manager.h"
|
||||
|
||||
#include "CL/cl.h"
|
||||
#include "extensions/public/cl_ext_private.h"
|
||||
#include "helpers/mem_properties_parser_helper.h"
|
||||
#include "mem_obj/mem_obj.h"
|
||||
#include "memory_properties_flags.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class MemObjHelper {
|
||||
public:
|
||||
static const uint64_t extraFlags;
|
||||
static const uint64_t extraFlagsIntel;
|
||||
static const uint64_t commonFlags;
|
||||
static const uint64_t commonFlagsIntel;
|
||||
static const uint64_t validFlagsForBuffer;
|
||||
static const uint64_t validFlagsForBufferIntel;
|
||||
static const uint64_t validFlagsForImage;
|
||||
static const uint64_t validFlagsForImageIntel;
|
||||
|
||||
static bool validateMemoryPropertiesForBuffer(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel);
|
||||
static bool validateMemoryPropertiesForImage(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, cl_mem parent);
|
||||
static AllocationProperties getAllocationPropertiesWithImageInfo(uint32_t rootDeviceIndex, ImageInfo &imgInfo, bool allocateMemory, const MemoryPropertiesFlags &memoryProperties);
|
||||
static bool checkMemFlagsForSubBuffer(cl_mem_flags flags);
|
||||
static SVMAllocsManager::SvmAllocationProperties getSvmAllocationProperties(cl_mem_flags flags);
|
||||
static bool isSuitableForRenderCompression(bool renderCompressed, const MemoryPropertiesFlags &properties, Context &context, bool preferCompression);
|
||||
|
||||
protected:
|
||||
static bool validateExtraMemoryProperties(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel);
|
||||
};
|
||||
} // namespace NEO
|
||||
107
opencl/source/mem_obj/mem_obj_helper_common.inl
Normal file
107
opencl/source/mem_obj/mem_obj_helper_common.inl
Normal file
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "mem_obj/mem_obj_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
bool MemObjHelper::validateMemoryPropertiesForBuffer(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel) {
|
||||
/* Check all the invalid flags combination. */
|
||||
if ((isValueSet(flags, CL_MEM_READ_WRITE | CL_MEM_READ_ONLY)) ||
|
||||
(isValueSet(flags, CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) ||
|
||||
(isValueSet(flags, CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY)) ||
|
||||
(isValueSet(flags, CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR)) ||
|
||||
(isValueSet(flags, CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) ||
|
||||
(isValueSet(flags, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) ||
|
||||
(isValueSet(flags, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY)) ||
|
||||
(isValueSet(flags, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return validateExtraMemoryProperties(memoryProperties, flags, flagsIntel);
|
||||
}
|
||||
|
||||
bool MemObjHelper::validateMemoryPropertiesForImage(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, cl_mem parent) {
|
||||
/* Check all the invalid flags combination. */
|
||||
if ((!isValueSet(flags, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) &&
|
||||
(isValueSet(flags, CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY) ||
|
||||
isValueSet(flags, CL_MEM_READ_WRITE | CL_MEM_READ_ONLY) ||
|
||||
isValueSet(flags, CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY) ||
|
||||
isValueSet(flags, CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR) ||
|
||||
isValueSet(flags, CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR) ||
|
||||
isValueSet(flags, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY) ||
|
||||
isValueSet(flags, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS) ||
|
||||
isValueSet(flags, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS) ||
|
||||
isValueSet(flags, CL_MEM_NO_ACCESS_INTEL | CL_MEM_READ_WRITE) ||
|
||||
isValueSet(flags, CL_MEM_NO_ACCESS_INTEL | CL_MEM_WRITE_ONLY) ||
|
||||
isValueSet(flags, CL_MEM_NO_ACCESS_INTEL | CL_MEM_READ_ONLY))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto parentMemObj = castToObject<MemObj>(parent);
|
||||
if (parentMemObj != nullptr && flags) {
|
||||
auto parentFlags = parentMemObj->getMemoryPropertiesFlags();
|
||||
/* Check whether flags are compatible with parent. */
|
||||
if (isValueSet(flags, CL_MEM_ALLOC_HOST_PTR) ||
|
||||
isValueSet(flags, CL_MEM_COPY_HOST_PTR) ||
|
||||
isValueSet(flags, CL_MEM_USE_HOST_PTR) ||
|
||||
((!isValueSet(parentFlags, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) &&
|
||||
(!isValueSet(flags, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) &&
|
||||
((isValueSet(parentFlags, CL_MEM_WRITE_ONLY) && isValueSet(flags, CL_MEM_READ_WRITE)) ||
|
||||
(isValueSet(parentFlags, CL_MEM_WRITE_ONLY) && isValueSet(flags, CL_MEM_READ_ONLY)) ||
|
||||
(isValueSet(parentFlags, CL_MEM_READ_ONLY) && isValueSet(flags, CL_MEM_READ_WRITE)) ||
|
||||
(isValueSet(parentFlags, CL_MEM_READ_ONLY) && isValueSet(flags, CL_MEM_WRITE_ONLY)) ||
|
||||
(isValueSet(parentFlags, CL_MEM_NO_ACCESS_INTEL) && isValueSet(flags, CL_MEM_READ_WRITE)) ||
|
||||
(isValueSet(parentFlags, CL_MEM_NO_ACCESS_INTEL) && isValueSet(flags, CL_MEM_WRITE_ONLY)) ||
|
||||
(isValueSet(parentFlags, CL_MEM_NO_ACCESS_INTEL) && isValueSet(flags, CL_MEM_READ_ONLY)) ||
|
||||
(isValueSet(parentFlags, CL_MEM_HOST_NO_ACCESS) && isValueSet(flags, CL_MEM_HOST_WRITE_ONLY)) ||
|
||||
(isValueSet(parentFlags, CL_MEM_HOST_NO_ACCESS) && isValueSet(flags, CL_MEM_HOST_READ_ONLY))))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return validateExtraMemoryProperties(memoryProperties, flags, flagsIntel);
|
||||
}
|
||||
|
||||
AllocationProperties MemObjHelper::getAllocationPropertiesWithImageInfo(uint32_t rootDeviceIndex, ImageInfo &imgInfo, bool allocateMemory, const MemoryPropertiesFlags &memoryProperties) {
|
||||
AllocationProperties allocationProperties{rootDeviceIndex, allocateMemory, imgInfo, GraphicsAllocation::AllocationType::IMAGE};
|
||||
MemoryPropertiesParser::fillPoliciesInProperties(allocationProperties, memoryProperties);
|
||||
return allocationProperties;
|
||||
}
|
||||
|
||||
bool MemObjHelper::checkMemFlagsForSubBuffer(cl_mem_flags flags) {
|
||||
const cl_mem_flags allValidFlags =
|
||||
CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY |
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
|
||||
|
||||
return isFieldValid(flags, allValidFlags);
|
||||
}
|
||||
|
||||
SVMAllocsManager::SvmAllocationProperties MemObjHelper::getSvmAllocationProperties(cl_mem_flags flags) {
|
||||
SVMAllocsManager::SvmAllocationProperties svmProperties;
|
||||
svmProperties.coherent = isValueSet(flags, CL_MEM_SVM_FINE_GRAIN_BUFFER);
|
||||
svmProperties.hostPtrReadOnly = isValueSet(flags, CL_MEM_HOST_READ_ONLY) || isValueSet(flags, CL_MEM_HOST_NO_ACCESS);
|
||||
svmProperties.readOnly = isValueSet(flags, CL_MEM_READ_ONLY);
|
||||
return svmProperties;
|
||||
}
|
||||
|
||||
const uint64_t MemObjHelper::commonFlags = extraFlags | CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY |
|
||||
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR |
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
|
||||
|
||||
const uint64_t MemObjHelper::commonFlagsIntel = extraFlagsIntel | CL_MEM_LOCALLY_UNCACHED_RESOURCE |
|
||||
CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE | CL_MEM_48BIT_RESOURCE_INTEL;
|
||||
|
||||
const uint64_t MemObjHelper::validFlagsForBuffer = commonFlags | CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL;
|
||||
|
||||
const uint64_t MemObjHelper::validFlagsForBufferIntel = commonFlagsIntel | CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL;
|
||||
|
||||
const uint64_t MemObjHelper::validFlagsForImage = commonFlags | CL_MEM_NO_ACCESS_INTEL | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL | CL_MEM_FORCE_LINEAR_STORAGE_INTEL;
|
||||
|
||||
const uint64_t MemObjHelper::validFlagsForImageIntel = commonFlagsIntel;
|
||||
|
||||
} // namespace NEO
|
||||
123
opencl/source/mem_obj/pipe.cpp
Normal file
123
opencl/source/mem_obj/pipe.cpp
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "mem_obj/pipe.h"
|
||||
|
||||
#include "core/helpers/get_info.h"
|
||||
#include "core/memory_manager/memory_manager.h"
|
||||
|
||||
#include "context/context.h"
|
||||
#include "device/cl_device.h"
|
||||
#include "helpers/get_info_status_mapper.h"
|
||||
#include "helpers/memory_properties_flags_helpers.h"
|
||||
#include "mem_obj/mem_obj_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
Pipe::Pipe(Context *context,
|
||||
cl_mem_flags flags,
|
||||
cl_uint packetSize,
|
||||
cl_uint maxPackets,
|
||||
const cl_pipe_properties *properties,
|
||||
void *memoryStorage,
|
||||
GraphicsAllocation *gfxAllocation)
|
||||
: MemObj(context,
|
||||
CL_MEM_OBJECT_PIPE,
|
||||
MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0),
|
||||
flags,
|
||||
0,
|
||||
static_cast<size_t>(packetSize * (maxPackets + 1) + intelPipeHeaderReservedSpace),
|
||||
memoryStorage,
|
||||
nullptr,
|
||||
gfxAllocation,
|
||||
false,
|
||||
false,
|
||||
false),
|
||||
pipePacketSize(packetSize),
|
||||
pipeMaxPackets(maxPackets) {
|
||||
magic = objectMagic;
|
||||
}
|
||||
|
||||
Pipe *Pipe::create(Context *context,
|
||||
cl_mem_flags flags,
|
||||
cl_uint packetSize,
|
||||
cl_uint maxPackets,
|
||||
const cl_pipe_properties *properties,
|
||||
cl_int &errcodeRet) {
|
||||
Pipe *pPipe = nullptr;
|
||||
errcodeRet = CL_SUCCESS;
|
||||
|
||||
MemoryManager *memoryManager = context->getMemoryManager();
|
||||
DEBUG_BREAK_IF(!memoryManager);
|
||||
|
||||
MemoryPropertiesFlags memoryPropertiesFlags = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0);
|
||||
while (true) {
|
||||
auto size = static_cast<size_t>(packetSize * (maxPackets + 1) + intelPipeHeaderReservedSpace);
|
||||
auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex();
|
||||
AllocationProperties allocProperties =
|
||||
MemoryPropertiesParser::getAllocationProperties(rootDeviceIndex, memoryPropertiesFlags, true, size, GraphicsAllocation::AllocationType::PIPE, false);
|
||||
GraphicsAllocation *memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties);
|
||||
if (!memory) {
|
||||
errcodeRet = CL_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
|
||||
pPipe = new (std::nothrow) Pipe(context, flags, packetSize, maxPackets, properties, memory->getUnderlyingBuffer(), memory);
|
||||
if (!pPipe) {
|
||||
memoryManager->freeGraphicsMemory(memory);
|
||||
memory = nullptr;
|
||||
errcodeRet = CL_OUT_OF_HOST_MEMORY;
|
||||
break;
|
||||
}
|
||||
// Initialize pipe_control_intel_t structure located at the beginning of the surface
|
||||
memset(memory->getUnderlyingBuffer(), 0, intelPipeHeaderReservedSpace);
|
||||
*reinterpret_cast<unsigned int *>(memory->getUnderlyingBuffer()) = maxPackets + 1;
|
||||
break;
|
||||
}
|
||||
|
||||
return pPipe;
|
||||
}
|
||||
|
||||
cl_int Pipe::getPipeInfo(cl_image_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet) {
|
||||
|
||||
cl_int retVal;
|
||||
size_t srcParamSize = 0;
|
||||
void *srcParam = nullptr;
|
||||
|
||||
switch (paramName) {
|
||||
case CL_PIPE_PACKET_SIZE:
|
||||
srcParamSize = sizeof(cl_uint);
|
||||
srcParam = &(pipePacketSize);
|
||||
break;
|
||||
|
||||
case CL_PIPE_MAX_PACKETS:
|
||||
srcParamSize = sizeof(cl_uint);
|
||||
srcParam = &(pipeMaxPackets);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
retVal = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, srcParam, srcParamSize));
|
||||
|
||||
if (paramValueSizeRet) {
|
||||
*paramValueSizeRet = srcParamSize;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
void Pipe::setPipeArg(void *memory, uint32_t patchSize) {
|
||||
patchWithRequiredSize(memory, patchSize, static_cast<uintptr_t>(getGraphicsAllocation()->getGpuAddressToPatch()));
|
||||
}
|
||||
|
||||
Pipe::~Pipe() = default;
|
||||
} // namespace NEO
|
||||
46
opencl/source/mem_obj/pipe.h
Normal file
46
opencl/source/mem_obj/pipe.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "mem_obj/buffer.h"
|
||||
|
||||
namespace NEO {
|
||||
class Pipe : public MemObj {
|
||||
public:
|
||||
static const size_t intelPipeHeaderReservedSpace = 128;
|
||||
static const cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL;
|
||||
static const cl_ulong objectMagic = MemObj::objectMagic | 0x03;
|
||||
static Pipe *create(
|
||||
Context *context,
|
||||
cl_mem_flags flags,
|
||||
cl_uint packetSize,
|
||||
cl_uint maxPackets,
|
||||
const cl_pipe_properties *properties,
|
||||
cl_int &errcodeRet);
|
||||
|
||||
~Pipe() override;
|
||||
|
||||
cl_int getPipeInfo(cl_image_info paramName,
|
||||
size_t paramValueSize,
|
||||
void *paramValue,
|
||||
size_t *paramValueSizeRet);
|
||||
|
||||
void setPipeArg(void *memory, uint32_t patchSize);
|
||||
|
||||
protected:
|
||||
Pipe(Context *context,
|
||||
cl_mem_flags flags,
|
||||
cl_uint packetSize,
|
||||
cl_uint maxPackets,
|
||||
const cl_pipe_properties *properties,
|
||||
void *memoryStorage,
|
||||
GraphicsAllocation *gfxAllocation);
|
||||
|
||||
cl_uint pipePacketSize;
|
||||
cl_uint pipeMaxPackets;
|
||||
};
|
||||
} // namespace NEO
|
||||
Reference in New Issue
Block a user