Reorganization directory structure [1/n]

Change-Id: Id1a94577437a4826a32411869f516fec20314ec0
This commit is contained in:
kamdiedrich
2020-02-22 21:54:11 +01:00
parent 247cc953d1
commit fa8e720f9e
660 changed files with 3 additions and 3 deletions

View File

@@ -0,0 +1,34 @@
#
# Copyright (C) 2018-2020 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
set(RUNTIME_SRCS_MEM_OBJ
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/buffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/buffer.h
${CMAKE_CURRENT_SOURCE_DIR}/buffer_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/buffer_bdw_plus.inl
${CMAKE_CURRENT_SOURCE_DIR}/buffer_factory_init.inl
${CMAKE_CURRENT_SOURCE_DIR}/image.cpp
${CMAKE_CURRENT_SOURCE_DIR}/image.h
${CMAKE_CURRENT_SOURCE_DIR}/image.inl
${CMAKE_CURRENT_SOURCE_DIR}/image_tgllp_plus.inl
${CMAKE_CURRENT_SOURCE_DIR}/image_factory_init.inl
${CMAKE_CURRENT_SOURCE_DIR}/map_operations_handler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/map_operations_handler.h
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/mem_obj_helper.cpp
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper.h
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_helper_common.inl
${CMAKE_CURRENT_SOURCE_DIR}/pipe.cpp
${CMAKE_CURRENT_SOURCE_DIR}/pipe.h
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/buffer_ext.inl
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/image_ext.inl
)
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_MEM_OBJ})
set_property(GLOBAL PROPERTY RUNTIME_SRCS_MEM_OBJ ${RUNTIME_SRCS_MEM_OBJ})
add_subdirectories()

View File

@@ -0,0 +1,649 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "mem_obj/buffer.h"
#include "core/command_stream/command_stream_receiver.h"
#include "core/debug_settings/debug_settings_manager.h"
#include "core/execution_environment/root_device_environment.h"
#include "core/gmm_helper/gmm.h"
#include "core/gmm_helper/gmm_helper.h"
#include "core/helpers/aligned_memory.h"
#include "core/helpers/hw_helper.h"
#include "core/helpers/hw_info.h"
#include "core/helpers/ptr_math.h"
#include "core/helpers/string.h"
#include "core/helpers/timestamp_packet.h"
#include "core/memory_manager/host_ptr_manager.h"
#include "core/memory_manager/memory_manager.h"
#include "core/memory_manager/memory_operations_handler.h"
#include "core/memory_manager/unified_memory_manager.h"
#include "command_queue/command_queue.h"
#include "context/context.h"
#include "device/cl_device.h"
#include "helpers/memory_properties_flags_helpers.h"
#include "helpers/validators.h"
#include "mem_obj/mem_obj_helper.h"
namespace NEO {
BufferFuncs bufferFactory[IGFX_MAX_CORE] = {};
Buffer::Buffer(Context *context,
MemoryPropertiesFlags memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *memoryStorage,
void *hostPtr,
GraphicsAllocation *gfxAllocation,
bool zeroCopy,
bool isHostPtrSVM,
bool isObjectRedescribed)
: MemObj(context,
CL_MEM_OBJECT_BUFFER,
memoryProperties,
flags,
flagsIntel,
size,
memoryStorage,
hostPtr,
gfxAllocation,
zeroCopy,
isHostPtrSVM,
isObjectRedescribed) {
magic = objectMagic;
setHostPtrMinSize(size);
}
Buffer::Buffer() : MemObj(nullptr, CL_MEM_OBJECT_BUFFER, {}, 0, 0, 0, nullptr, nullptr, nullptr, false, false, false) {
}
Buffer::~Buffer() = default;
bool Buffer::isSubBuffer() {
return this->associatedMemObject != nullptr;
}
bool Buffer::isValidSubBufferOffset(size_t offset) {
if (this->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
// From spec: "origin value is aligned to the CL_DEVICE_MEM_BASE_ADDR_ALIGN value"
if (!isAligned(offset, this->getContext()->getDevice(0)->getDeviceInfo().memBaseAddressAlign / 8u)) {
return false;
}
}
cl_uint address_align = 32; // 4 byte alignment
if ((offset & (address_align / 8 - 1)) == 0) {
return true;
}
return false;
}
void Buffer::validateInputAndCreateBuffer(cl_context &context,
MemoryPropertiesFlags memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *hostPtr,
cl_int &retVal,
cl_mem &buffer) {
Context *pContext = nullptr;
retVal = validateObjects(WithCastToInternal(context, &pContext));
if (retVal != CL_SUCCESS) {
return;
}
if (!MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel)) {
retVal = CL_INVALID_VALUE;
return;
}
auto pDevice = pContext->getDevice(0);
bool allowCreateBuffersWithUnrestrictedSize = isValueSet(flags, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL) ||
isValueSet(flagsIntel, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL);
if (size == 0 || (size > pDevice->getHardwareCapabilities().maxMemAllocSize && !allowCreateBuffersWithUnrestrictedSize)) {
retVal = CL_INVALID_BUFFER_SIZE;
return;
}
/* Check the host ptr and data */
bool expectHostPtr = (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) != 0;
if ((hostPtr == nullptr) == expectHostPtr) {
retVal = CL_INVALID_HOST_PTR;
return;
}
// create the buffer
buffer = create(pContext, memoryProperties, flags, flagsIntel, size, hostPtr, retVal);
}
Buffer *Buffer::create(Context *context,
cl_mem_flags flags,
size_t size,
void *hostPtr,
cl_int &errcodeRet) {
return create(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, size, hostPtr, errcodeRet);
}
Buffer *Buffer::create(Context *context,
MemoryPropertiesFlags memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *hostPtr,
cl_int &errcodeRet) {
Buffer *pBuffer = nullptr;
errcodeRet = CL_SUCCESS;
GraphicsAllocation *memory = nullptr;
GraphicsAllocation *mapAllocation = nullptr;
bool zeroCopyAllowed = true;
bool isHostPtrSVM = false;
bool alignementSatisfied = true;
bool allocateMemory = true;
bool copyMemoryFromHostPtr = false;
auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex();
MemoryManager *memoryManager = context->getMemoryManager();
UNRECOVERABLE_IF(!memoryManager);
GraphicsAllocation::AllocationType allocationType = getGraphicsAllocationType(
memoryProperties,
*context,
HwHelper::renderCompressedBuffersSupported(context->getDevice(0)->getHardwareInfo()),
memoryManager->isLocalMemorySupported(rootDeviceIndex),
HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily).obtainRenderBufferCompressionPreference(context->getDevice(0)->getHardwareInfo(), size));
checkMemory(memoryProperties, size, hostPtr, errcodeRet, alignementSatisfied, copyMemoryFromHostPtr, memoryManager);
if (errcodeRet != CL_SUCCESS) {
return nullptr;
}
if (allocationType == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
zeroCopyAllowed = false;
allocateMemory = true;
}
if (allocationType == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) {
if (memoryProperties.flags.useHostPtr) {
if (alignementSatisfied) {
allocateMemory = false;
zeroCopyAllowed = true;
} else {
zeroCopyAllowed = false;
allocateMemory = true;
}
}
}
if (memoryProperties.flags.useHostPtr) {
if (DebugManager.flags.DisableZeroCopyForUseHostPtr.get()) {
zeroCopyAllowed = false;
allocateMemory = true;
}
auto svmManager = context->getSVMAllocsManager();
if (svmManager) {
auto svmData = svmManager->getSVMAlloc(hostPtr);
if (svmData) {
memory = svmData->gpuAllocation;
allocationType = memory->getAllocationType();
isHostPtrSVM = true;
zeroCopyAllowed = memory->getAllocationType() == GraphicsAllocation::AllocationType::SVM_ZERO_COPY;
copyMemoryFromHostPtr = false;
allocateMemory = false;
mapAllocation = svmData->cpuAllocation;
}
}
}
if (context->isSharedContext) {
zeroCopyAllowed = true;
copyMemoryFromHostPtr = false;
allocateMemory = false;
}
if (hostPtr && context->isProvidingPerformanceHints()) {
if (zeroCopyAllowed) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_MEETS_ALIGNMENT_RESTRICTIONS, hostPtr, size);
} else {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, hostPtr, size, MemoryConstants::pageSize, MemoryConstants::pageSize);
}
}
if (DebugManager.flags.DisableZeroCopyForBuffers.get()) {
zeroCopyAllowed = false;
}
if (allocateMemory && context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_NEEDS_ALLOCATE_MEMORY);
}
if (!memory) {
AllocationProperties allocProperties = MemoryPropertiesParser::getAllocationProperties(rootDeviceIndex, memoryProperties, allocateMemory, size, allocationType, context->areMultiStorageAllocationsPreferred());
memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties, hostPtr);
}
if (allocateMemory && memory && MemoryPool::isSystemMemoryPool(memory->getMemoryPool())) {
memoryManager->addAllocationToHostPtrManager(memory);
}
//if allocation failed for CL_MEM_USE_HOST_PTR case retry with non zero copy path
if (memoryProperties.flags.useHostPtr && !memory && Buffer::isReadOnlyMemoryPermittedByFlags(memoryProperties)) {
allocationType = GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY;
zeroCopyAllowed = false;
copyMemoryFromHostPtr = true;
AllocationProperties allocProperties = MemoryPropertiesParser::getAllocationProperties(rootDeviceIndex, memoryProperties, true, size, allocationType, context->areMultiStorageAllocationsPreferred());
memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties);
}
if (!memory) {
errcodeRet = CL_OUT_OF_HOST_MEMORY;
return nullptr;
}
if (!MemoryPool::isSystemMemoryPool(memory->getMemoryPool())) {
zeroCopyAllowed = false;
if (hostPtr) {
if (!isHostPtrSVM) {
copyMemoryFromHostPtr = true;
}
}
} else if (allocationType == GraphicsAllocation::AllocationType::BUFFER) {
allocationType = GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY;
}
memory->setAllocationType(allocationType);
memory->setMemObjectsAllocationWithWritableFlags(!(memoryProperties.flags.readOnly || memoryProperties.flags.hostReadOnly || memoryProperties.flags.hostNoAccess));
pBuffer = createBufferHw(context,
memoryProperties,
flags,
flagsIntel,
size,
memory->getUnderlyingBuffer(),
(memoryProperties.flags.useHostPtr) ? hostPtr : nullptr,
memory,
zeroCopyAllowed,
isHostPtrSVM,
false);
if (!pBuffer) {
errcodeRet = CL_OUT_OF_HOST_MEMORY;
memoryManager->removeAllocationFromHostPtrManager(memory);
memoryManager->freeGraphicsMemory(memory);
return nullptr;
}
printDebugString(DebugManager.flags.LogMemoryObject.get(), stdout,
"\nCreated Buffer: Handle %p, hostPtr %p, size %llu, memoryStorage %p, GPU address %#llx, memoryPool:%du\n",
pBuffer, hostPtr, size, memory->getUnderlyingBuffer(), memory->getGpuAddress(), memory->getMemoryPool());
if (memoryProperties.flags.useHostPtr) {
if (!zeroCopyAllowed && !isHostPtrSVM) {
AllocationProperties properties{rootDeviceIndex, false, size, GraphicsAllocation::AllocationType::MAP_ALLOCATION, false};
properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = true;
mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr);
}
}
Buffer::provideCompressionHint(allocationType, context, pBuffer);
pBuffer->mapAllocation = mapAllocation;
pBuffer->setHostPtrMinSize(size);
if (copyMemoryFromHostPtr) {
auto gmm = memory->getDefaultGmm();
bool gpuCopyRequired = (gmm && gmm->isRenderCompressed) || !MemoryPool::isSystemMemoryPool(memory->getMemoryPool());
if (gpuCopyRequired) {
auto blitMemoryToAllocationResult = context->blitMemoryToAllocation(*pBuffer, memory, hostPtr, size);
if (blitMemoryToAllocationResult != BlitOperationResult::Success) {
auto cmdQ = context->getSpecialQueue();
if (CL_SUCCESS != cmdQ->enqueueWriteBuffer(pBuffer, CL_TRUE, 0, size, hostPtr, nullptr, 0, nullptr, nullptr)) {
errcodeRet = CL_OUT_OF_RESOURCES;
}
}
} else {
memcpy_s(memory->getUnderlyingBuffer(), size, hostPtr, size);
}
}
if (errcodeRet != CL_SUCCESS) {
pBuffer->release();
return nullptr;
}
if (DebugManager.flags.MakeAllBuffersResident.get()) {
auto graphicsAllocation = pBuffer->getGraphicsAllocation();
context->getDevice(0u)->getRootDeviceEnvironment().memoryOperationsInterface->makeResident(ArrayRef<GraphicsAllocation *>(&graphicsAllocation, 1));
}
return pBuffer;
}
Buffer *Buffer::createSharedBuffer(Context *context, cl_mem_flags flags, SharingHandler *sharingHandler,
GraphicsAllocation *graphicsAllocation) {
auto sharedBuffer = createBufferHw(context, MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0), flags, 0, graphicsAllocation->getUnderlyingBufferSize(), nullptr, nullptr, graphicsAllocation, false, false, false);
sharedBuffer->setSharingHandler(sharingHandler);
return sharedBuffer;
}
void Buffer::checkMemory(MemoryPropertiesFlags memoryProperties,
size_t size,
void *hostPtr,
cl_int &errcodeRet,
bool &alignementSatisfied,
bool &copyMemoryFromHostPtr,
MemoryManager *memoryManager) {
errcodeRet = CL_SUCCESS;
alignementSatisfied = true;
copyMemoryFromHostPtr = false;
uintptr_t minAddress = 0;
auto memRestrictions = memoryManager->getAlignedMallocRestrictions();
if (memRestrictions) {
minAddress = memRestrictions->minAddress;
}
if (hostPtr) {
if (!(memoryProperties.flags.useHostPtr || memoryProperties.flags.copyHostPtr)) {
errcodeRet = CL_INVALID_HOST_PTR;
return;
}
}
if (memoryProperties.flags.useHostPtr) {
if (hostPtr) {
auto fragment = memoryManager->getHostPtrManager()->getFragment(hostPtr);
if (fragment && fragment->driverAllocation) {
errcodeRet = CL_INVALID_HOST_PTR;
return;
}
if (alignUp(hostPtr, MemoryConstants::cacheLineSize) != hostPtr ||
alignUp(size, MemoryConstants::cacheLineSize) != size ||
minAddress > reinterpret_cast<uintptr_t>(hostPtr)) {
alignementSatisfied = false;
copyMemoryFromHostPtr = true;
}
} else {
errcodeRet = CL_INVALID_HOST_PTR;
}
}
if (memoryProperties.flags.copyHostPtr) {
if (hostPtr) {
copyMemoryFromHostPtr = true;
} else {
errcodeRet = CL_INVALID_HOST_PTR;
}
}
return;
}
GraphicsAllocation::AllocationType Buffer::getGraphicsAllocationType(const MemoryPropertiesFlags &properties, Context &context,
bool renderCompressedBuffers, bool isLocalMemoryEnabled,
bool preferCompression) {
if (context.isSharedContext || properties.flags.forceSharedPhysicalMemory) {
return GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY;
}
if (properties.flags.useHostPtr && !isLocalMemoryEnabled) {
return GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY;
}
if (MemObjHelper::isSuitableForRenderCompression(renderCompressedBuffers, properties, context, preferCompression)) {
return GraphicsAllocation::AllocationType::BUFFER_COMPRESSED;
}
return GraphicsAllocation::AllocationType::BUFFER;
}
bool Buffer::isReadOnlyMemoryPermittedByFlags(const MemoryPropertiesFlags &properties) {
// Host won't access or will only read and kernel will only read
return (properties.flags.hostNoAccess || properties.flags.hostReadOnly) && properties.flags.readOnly;
}
Buffer *Buffer::createSubBuffer(cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
const cl_buffer_region *region,
cl_int &errcodeRet) {
DEBUG_BREAK_IF(nullptr == createFunction);
MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0);
auto buffer = createFunction(this->context, memoryProperties, flags, 0, region->size,
ptrOffset(this->memoryStorage, region->origin),
this->hostPtr ? ptrOffset(this->hostPtr, region->origin) : nullptr,
this->graphicsAllocation,
this->isZeroCopy, this->isHostPtrSVM, false);
if (this->context->isProvidingPerformanceHints()) {
this->context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, SUBBUFFER_SHARES_MEMORY, static_cast<cl_mem>(this));
}
buffer->associatedMemObject = this;
buffer->offset = region->origin;
buffer->setParentSharingHandler(this->getSharingHandler());
this->incRefInternal();
errcodeRet = CL_SUCCESS;
return buffer;
}
uint64_t Buffer::setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing) {
// Subbuffers have offset that graphicsAllocation is not aware of
uintptr_t addressToPatch = ((set32BitAddressing) ? static_cast<uintptr_t>(graphicsAllocation->getGpuAddressToPatch()) : static_cast<uintptr_t>(graphicsAllocation->getGpuAddress())) + this->offset;
DEBUG_BREAK_IF(!(graphicsAllocation->isLocked() || (addressToPatch != 0) || (graphicsAllocation->getGpuBaseAddress() != 0) ||
(this->getCpuAddress() == nullptr && this->getGraphicsAllocation()->peekSharedHandle())));
patchWithRequiredSize(memory, patchSize, addressToPatch);
return addressToPatch;
}
bool Buffer::bufferRectPitchSet(const size_t *bufferOrigin,
const size_t *region,
size_t &bufferRowPitch,
size_t &bufferSlicePitch,
size_t &hostRowPitch,
size_t &hostSlicePitch) {
if (bufferRowPitch == 0)
bufferRowPitch = region[0];
if (bufferSlicePitch == 0)
bufferSlicePitch = region[1] * bufferRowPitch;
if (hostRowPitch == 0)
hostRowPitch = region[0];
if (hostSlicePitch == 0)
hostSlicePitch = region[1] * hostRowPitch;
if (bufferRowPitch < region[0] ||
hostRowPitch < region[0]) {
return false;
}
if ((bufferSlicePitch < region[1] * bufferRowPitch || bufferSlicePitch % bufferRowPitch != 0) ||
(hostSlicePitch < region[1] * hostRowPitch || hostSlicePitch % hostRowPitch != 0)) {
return false;
}
if ((bufferOrigin[2] + region[2] - 1) * bufferSlicePitch + (bufferOrigin[1] + region[1] - 1) * bufferRowPitch + bufferOrigin[0] + region[0] > this->getSize()) {
return false;
}
return true;
}
void Buffer::transferData(void *dst, void *src, size_t copySize, size_t copyOffset) {
DBG_LOG(LogMemoryObject, __FUNCTION__, " hostPtr: ", hostPtr, ", size: ", copySize, ", offset: ", copyOffset, ", memoryStorage: ", memoryStorage);
auto dstPtr = ptrOffset(dst, copyOffset);
auto srcPtr = ptrOffset(src, copyOffset);
memcpy_s(dstPtr, copySize, srcPtr, copySize);
}
void Buffer::transferDataToHostPtr(MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset) {
transferData(hostPtr, memoryStorage, copySize[0], copyOffset[0]);
}
void Buffer::transferDataFromHostPtr(MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset) {
transferData(memoryStorage, hostPtr, copySize[0], copyOffset[0]);
}
size_t Buffer::calculateHostPtrSize(const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch) {
size_t hostPtrOffsetInBytes = origin[2] * slicePitch + origin[1] * rowPitch + origin[0];
size_t hostPtrRegionSizeInbytes = region[0] + rowPitch * (region[1] - 1) + slicePitch * (region[2] - 1);
size_t hostPtrSize = hostPtrOffsetInBytes + hostPtrRegionSizeInbytes;
return hostPtrSize;
}
bool Buffer::isReadWriteOnCpuAllowed() {
if (forceDisallowCPUCopy) {
return false;
}
if (this->isCompressed()) {
return false;
}
if (graphicsAllocation->peekSharedHandle() != 0) {
return false;
}
return true;
}
bool Buffer::isReadWriteOnCpuPreffered(void *ptr, size_t size) {
//if buffer is not zero copy and pointer is aligned it will be more beneficial to do the transfer on GPU
if (!isMemObjZeroCopy() && (reinterpret_cast<uintptr_t>(ptr) & (MemoryConstants::cacheLineSize - 1)) == 0) {
return false;
}
//on low power devices larger transfers are better on the GPU
if (context->getDevice(0)->getDeviceInfo().platformLP && size > maxBufferSizeForReadWriteOnCpu) {
return false;
}
//if we are not in System Memory Pool, it is more beneficial to do the transfer on GPU
//for 32 bit applications, utilize CPU transfers here.
if (!MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool()) && is64bit) {
return false;
}
return true;
}
Buffer *Buffer::createBufferHw(Context *context,
MemoryPropertiesFlags memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *memoryStorage,
void *hostPtr,
GraphicsAllocation *gfxAllocation,
bool zeroCopy,
bool isHostPtrSVM,
bool isImageRedescribed) {
const auto device = context->getDevice(0);
const auto &hwInfo = device->getHardwareInfo();
auto funcCreate = bufferFactory[hwInfo.platform.eRenderCoreFamily].createBufferFunction;
DEBUG_BREAK_IF(nullptr == funcCreate);
auto pBuffer = funcCreate(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, gfxAllocation,
zeroCopy, isHostPtrSVM, isImageRedescribed);
DEBUG_BREAK_IF(nullptr == pBuffer);
if (pBuffer) {
pBuffer->createFunction = funcCreate;
}
return pBuffer;
}
Buffer *Buffer::createBufferHwFromDevice(const ClDevice *device,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *memoryStorage,
void *hostPtr,
GraphicsAllocation *gfxAllocation,
size_t offset,
bool zeroCopy,
bool isHostPtrSVM,
bool isImageRedescribed) {
const auto &hwInfo = device->getHardwareInfo();
auto funcCreate = bufferFactory[hwInfo.platform.eRenderCoreFamily].createBufferFunction;
DEBUG_BREAK_IF(nullptr == funcCreate);
MemoryPropertiesFlags memoryProperties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, flagsIntel, 0);
auto pBuffer = funcCreate(nullptr, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, gfxAllocation,
zeroCopy, isHostPtrSVM, isImageRedescribed);
pBuffer->offset = offset;
pBuffer->executionEnvironment = device->getExecutionEnvironment();
return pBuffer;
}
uint32_t Buffer::getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) const {
uint64_t bufferAddress = 0;
size_t bufferSize = 0;
if (getGraphicsAllocation()) {
bufferAddress = getGraphicsAllocation()->getGpuAddress();
bufferSize = getGraphicsAllocation()->getUnderlyingBufferSize();
} else {
bufferAddress = reinterpret_cast<uint64_t>(getHostPtr());
bufferSize = getSize();
}
bufferAddress += this->offset;
bool readOnlyMemObj = isValueSet(getMemoryPropertiesFlags(), CL_MEM_READ_ONLY) || isReadOnlyArgument;
bool alignedMemObj = isAligned<MemoryConstants::cacheLineSize>(bufferAddress) &&
isAligned<MemoryConstants::cacheLineSize>(bufferSize);
auto gmmHelper = executionEnvironment->getGmmHelper();
if (!disableL3Cache && !isMemObjUncacheableForSurfaceState() && (alignedMemObj || readOnlyMemObj || !isMemObjZeroCopy())) {
return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
} else {
return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
}
}
bool Buffer::isCompressed() const {
if (this->getGraphicsAllocation()->getDefaultGmm()) {
return this->getGraphicsAllocation()->getDefaultGmm()->isRenderCompressed;
}
if (this->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
return true;
}
return false;
}
void Buffer::setSurfaceState(const ClDevice *device,
void *surfaceState,
size_t svmSize,
void *svmPtr,
size_t offset,
GraphicsAllocation *gfxAlloc,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel) {
auto buffer = Buffer::createBufferHwFromDevice(device, flags, flagsIntel, svmSize, svmPtr, svmPtr, gfxAlloc, offset, true, false, false);
buffer->setArgStateful(surfaceState, false, false, false, false);
buffer->graphicsAllocation = nullptr;
delete buffer;
}
void Buffer::provideCompressionHint(GraphicsAllocation::AllocationType allocationType,
Context *context,
Buffer *buffer) {
if (context->isProvidingPerformanceHints() && HwHelper::renderCompressedBuffersSupported(context->getDevice(0)->getHardwareInfo())) {
if (allocationType == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, BUFFER_IS_COMPRESSED, buffer);
} else {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, BUFFER_IS_NOT_COMPRESSED, buffer);
}
}
}
} // namespace NEO

View File

@@ -0,0 +1,225 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "core/helpers/basic_math.h"
#include "core/memory_manager/memory_constants.h"
#include "context/context_type.h"
#include "extensions/public/cl_ext_private.h"
#include "igfxfmid.h"
#include "mem_obj/mem_obj.h"
#include "memory_properties_flags.h"
namespace NEO {
class Buffer;
class ClDevice;
class MemoryManager;
typedef Buffer *(*BufferCreatFunc)(Context *context,
MemoryPropertiesFlags memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *memoryStorage,
void *hostPtr,
GraphicsAllocation *gfxAllocation,
bool zeroCopy,
bool isHostPtrSVM,
bool isImageRedescribed);
typedef struct {
BufferCreatFunc createBufferFunction;
} BufferFuncs;
extern BufferFuncs bufferFactory[IGFX_MAX_CORE];
class Buffer : public MemObj {
public:
constexpr static size_t maxBufferSizeForReadWriteOnCpu = 10 * MB;
constexpr static cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL;
constexpr static cl_ulong objectMagic = MemObj::objectMagic | 0x02;
bool forceDisallowCPUCopy = false;
~Buffer() override;
static void validateInputAndCreateBuffer(cl_context &context,
MemoryPropertiesFlags memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *hostPtr,
cl_int &retVal,
cl_mem &buffer);
static Buffer *create(Context *context,
cl_mem_flags flags,
size_t size,
void *hostPtr,
cl_int &errcodeRet);
static Buffer *create(Context *context,
MemoryPropertiesFlags properties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *hostPtr,
cl_int &errcodeRet);
static Buffer *createSharedBuffer(Context *context,
cl_mem_flags flags,
SharingHandler *sharingHandler,
GraphicsAllocation *graphicsAllocation);
static Buffer *createBufferHw(Context *context,
MemoryPropertiesFlags memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *memoryStorage,
void *hostPtr,
GraphicsAllocation *gfxAllocation,
bool zeroCopy,
bool isHostPtrSVM,
bool isImageRedescribed);
static Buffer *createBufferHwFromDevice(const ClDevice *device,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *memoryStorage,
void *hostPtr,
GraphicsAllocation *gfxAllocation,
size_t offset,
bool zeroCopy,
bool isHostPtrSVM,
bool isImageRedescribed);
Buffer *createSubBuffer(cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
const cl_buffer_region *region,
cl_int &errcodeRet);
static void setSurfaceState(const ClDevice *device,
void *surfaceState,
size_t svmSize,
void *svmPtr,
size_t offset,
GraphicsAllocation *gfxAlloc,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel);
static void provideCompressionHint(GraphicsAllocation::AllocationType allocationType,
Context *context,
Buffer *buffer);
BufferCreatFunc createFunction = nullptr;
bool isSubBuffer();
bool isValidSubBufferOffset(size_t offset);
uint64_t setArgStateless(void *memory, uint32_t patchSize) { return setArgStateless(memory, patchSize, false); }
uint64_t setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing);
virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly) = 0;
bool bufferRectPitchSet(const size_t *bufferOrigin,
const size_t *region,
size_t &bufferRowPitch,
size_t &bufferSlicePitch,
size_t &hostRowPitch,
size_t &hostSlicePitch);
static size_t calculateHostPtrSize(const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch);
void transferDataToHostPtr(MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset) override;
void transferDataFromHostPtr(MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset) override;
bool isReadWriteOnCpuAllowed();
bool isReadWriteOnCpuPreffered(void *ptr, size_t size);
uint32_t getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) const;
bool isCompressed() const;
protected:
Buffer(Context *context,
MemoryPropertiesFlags memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *memoryStorage,
void *hostPtr,
GraphicsAllocation *gfxAllocation,
bool zeroCopy,
bool isHostPtrSVM,
bool isObjectRedescribed);
Buffer();
static void checkMemory(MemoryPropertiesFlags memoryProperties,
size_t size,
void *hostPtr,
cl_int &errcodeRet,
bool &isZeroCopy,
bool &copyMemoryFromHostPtr,
MemoryManager *memMngr);
static GraphicsAllocation::AllocationType getGraphicsAllocationType(const MemoryPropertiesFlags &properties, Context &context,
bool renderCompressedBuffers, bool localMemoryEnabled,
bool preferCompression);
static bool isReadOnlyMemoryPermittedByFlags(const MemoryPropertiesFlags &properties);
void transferData(void *dst, void *src, size_t copySize, size_t copyOffset);
};
template <typename GfxFamily>
class BufferHw : public Buffer {
public:
BufferHw(Context *context,
MemoryPropertiesFlags memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *memoryStorage,
void *hostPtr,
GraphicsAllocation *gfxAllocation,
bool zeroCopy,
bool isHostPtrSVM,
bool isObjectRedescribed)
: Buffer(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, gfxAllocation,
zeroCopy, isHostPtrSVM, isObjectRedescribed) {}
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument) override;
void appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation, bool isReadOnlyArgument);
void appendSurfaceStateExt(void *memory);
static Buffer *create(Context *context,
MemoryPropertiesFlags memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *memoryStorage,
void *hostPtr,
GraphicsAllocation *gfxAllocation,
bool zeroCopy,
bool isHostPtrSVM,
bool isObjectRedescribed) {
auto buffer = new BufferHw<GfxFamily>(context,
memoryProperties,
flags,
flagsIntel,
size,
memoryStorage,
hostPtr,
gfxAllocation,
zeroCopy,
isHostPtrSVM,
isObjectRedescribed);
buffer->surfaceType = SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D;
return buffer;
}
typedef typename GfxFamily::RENDER_SURFACE_STATE SURFACE_STATE;
typename SURFACE_STATE::SURFACE_TYPE surfaceType;
};
} // namespace NEO

View File

@@ -0,0 +1,85 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/execution_environment/execution_environment.h"
#include "core/gmm_helper/gmm.h"
#include "core/gmm_helper/resource_info.h"
#include "core/helpers/aligned_memory.h"
#include "core/helpers/bit_helpers.h"
#include "core/helpers/hw_cmds.h"
#include "buffer_ext.inl"
#include "helpers/surface_formats.h"
#include "mem_obj/buffer.h"
namespace NEO {
union SURFACE_STATE_BUFFER_LENGTH {
uint32_t Length;
struct SurfaceState {
uint32_t Width : BITFIELD_RANGE(0, 6);
uint32_t Height : BITFIELD_RANGE(7, 20);
uint32_t Depth : BITFIELD_RANGE(21, 31);
} SurfaceState;
};
template <typename GfxFamily>
void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument) {
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(memory);
// The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address
auto bufferAddress = (getGraphicsAllocation() != nullptr) ? getGraphicsAllocation()->getGpuAddress() : castToUint64(getHostPtr());
bufferAddress += this->offset;
auto bufferAddressAligned = alignDown(bufferAddress, 4);
auto bufferOffset = ptrDiff(bufferAddress, bufferAddressAligned);
auto surfaceSize = alignUp(getSize() + bufferOffset, alignSizeForAuxTranslation ? 512 : 4);
SURFACE_STATE_BUFFER_LENGTH Length = {0};
Length.Length = static_cast<uint32_t>(surfaceSize - 1);
surfaceState->setWidth(Length.SurfaceState.Width + 1);
surfaceState->setHeight(Length.SurfaceState.Height + 1);
surfaceState->setDepth(Length.SurfaceState.Depth + 1);
if (bufferAddress != 0) {
surfaceState->setSurfaceType(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER);
} else {
surfaceState->setSurfaceType(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL);
}
surfaceState->setSurfaceFormat(SURFACE_FORMAT::SURFACE_FORMAT_RAW);
surfaceState->setSurfaceVerticalAlignment(RENDER_SURFACE_STATE::SURFACE_VERTICAL_ALIGNMENT_VALIGN_4);
surfaceState->setSurfaceHorizontalAlignment(RENDER_SURFACE_STATE::SURFACE_HORIZONTAL_ALIGNMENT_HALIGN_4);
surfaceState->setTileMode(RENDER_SURFACE_STATE::TILE_MODE_LINEAR);
surfaceState->setVerticalLineStride(0);
surfaceState->setVerticalLineStrideOffset(0);
surfaceState->setMemoryObjectControlState(getMocsValue(disableL3, isReadOnlyArgument));
surfaceState->setSurfaceBaseAddress(bufferAddressAligned);
Gmm *gmm = graphicsAllocation ? graphicsAllocation->getDefaultGmm() : nullptr;
if (gmm && gmm->isRenderCompressed && !forceNonAuxMode &&
GraphicsAllocation::AllocationType::BUFFER_COMPRESSED == graphicsAllocation->getAllocationType()) {
// Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios
surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT);
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E);
} else {
surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT);
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE);
}
appendBufferState(memory, context, getGraphicsAllocation(), isReadOnlyArgument);
appendSurfaceStateExt(memory);
}
} // namespace NEO

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "mem_obj/buffer_base.inl"
namespace NEO {
template <typename GfxFamily>
void BufferHw<GfxFamily>::appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation, bool isReadOnly) {
}
} // namespace NEO

View File

@@ -0,0 +1,12 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
template <>
void populateFactoryTable<BufferHw<Family>>() {
extern BufferFuncs bufferFactory[IGFX_MAX_CORE];
bufferFactory[gfxCore].createBufferFunction = BufferHw<Family>::create;
}

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "mem_obj/buffer.h"
namespace NEO {
template <typename GfxFamily>
void BufferHw<GfxFamily>::appendSurfaceStateExt(void *memory) {
}
} // namespace NEO

View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "mem_obj/image.h"
namespace NEO {
template <typename GfxFamily>
void ImageHw<GfxFamily>::appendSurfaceStateExt(void *memory) {
}
} // namespace NEO

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,355 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "core/helpers/string.h"
#include "core/image/image_surface_state.h"
#include "helpers/surface_formats.h"
#include "helpers/validators.h"
#include "mem_obj/buffer.h"
#include "mem_obj/mem_obj.h"
namespace NEO {
class Image;
struct KernelInfo;
struct SurfaceFormatInfo;
typedef Image *(*ImageCreatFunc)(Context *context,
const MemoryPropertiesFlags &memoryProperties,
uint64_t flags,
uint64_t flagsIntel,
size_t size,
void *hostPtr,
const cl_image_format &imageFormat,
const cl_image_desc &imageDesc,
bool zeroCopy,
GraphicsAllocation *graphicsAllocation,
bool isImageRedescribed,
uint32_t baseMipLevel,
uint32_t mipCount,
const ClSurfaceFormatInfo *surfaceFormatInfo,
const SurfaceOffsets *surfaceOffsets);
typedef struct {
ImageCreatFunc createImageFunction;
} ImageFuncs;
class Image : public MemObj {
public:
const static cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL;
static const cl_ulong objectMagic = MemObj::objectMagic | 0x01;
~Image() override;
static Image *create(Context *context,
const MemoryPropertiesFlags &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
const ClSurfaceFormatInfo *surfaceFormat,
const cl_image_desc *imageDesc,
const void *hostPtr,
cl_int &errcodeRet);
static Image *validateAndCreateImage(Context *context,
const MemoryPropertiesFlags &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
const cl_image_format *imageFormat,
const cl_image_desc *imageDesc,
const void *hostPtr,
cl_int &errcodeRet);
static Image *createImageHw(Context *context, const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags,
cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr,
const cl_image_format &imageFormat, const cl_image_desc &imageDesc,
bool zeroCopy, GraphicsAllocation *graphicsAllocation,
bool isObjectRedescribed, uint32_t baseMipLevel, uint32_t mipCount, const ClSurfaceFormatInfo *surfaceFormatInfo = nullptr);
static Image *createSharedImage(Context *context, SharingHandler *sharingHandler, const McsSurfaceInfo &mcsSurfaceInfo,
GraphicsAllocation *graphicsAllocation, GraphicsAllocation *mcsAllocation,
cl_mem_flags flags, const ClSurfaceFormatInfo *surfaceFormat, ImageInfo &imgInfo, uint32_t cubeFaceIndex, uint32_t baseMipLevel, uint32_t mipCount);
static cl_int validate(Context *context,
const MemoryPropertiesFlags &memoryProperties,
const ClSurfaceFormatInfo *surfaceFormat,
const cl_image_desc *imageDesc,
const void *hostPtr);
static cl_int validateImageFormat(const cl_image_format *imageFormat);
static int32_t validatePlanarYUV(Context *context,
const MemoryPropertiesFlags &memoryProperties,
const cl_image_desc *imageDesc,
const void *hostPtr);
static int32_t validatePackedYUV(const MemoryPropertiesFlags &memoryProperties, const cl_image_desc *imageDesc);
static cl_int validateImageTraits(Context *context, const MemoryPropertiesFlags &memoryProperties, const cl_image_format *imageFormat, const cl_image_desc *imageDesc, const void *hostPtr);
static size_t calculateHostPtrSize(const size_t *region, size_t rowPitch, size_t slicePitch, size_t pixelSize, uint32_t imageType);
static void calculateHostPtrOffset(size_t *imageOffset, const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch, uint32_t imageType, size_t bytesPerPixel);
static cl_int getImageParams(Context *context,
cl_mem_flags flags,
const ClSurfaceFormatInfo *surfaceFormat,
const cl_image_desc *imageDesc,
size_t *imageRowPitch,
size_t *imageSlicePitch);
static bool isImage1d(const cl_image_desc &imageDesc);
static bool isImage2d(cl_mem_object_type imageType);
static bool isImage2dOr2dArray(cl_mem_object_type imageType);
static bool isDepthFormat(const cl_image_format &imageFormat);
static bool hasSlices(cl_mem_object_type type) {
return (type == CL_MEM_OBJECT_IMAGE3D) || (type == CL_MEM_OBJECT_IMAGE1D_ARRAY) || (type == CL_MEM_OBJECT_IMAGE2D_ARRAY);
}
static ImageType convertType(const cl_mem_object_type type);
static cl_mem_object_type convertType(const ImageType type);
static ImageDescriptor convertDescriptor(const cl_image_desc &imageDesc);
static cl_image_desc convertDescriptor(const ImageDescriptor &imageDesc);
cl_int getImageInfo(cl_image_info paramName,
size_t paramValueSize,
void *paramValue,
size_t *paramValueSizeRet);
virtual void setImageArg(void *memory, bool isMediaBlockImage, uint32_t mipLevel) = 0;
virtual void setMediaImageArg(void *memory) = 0;
virtual void setMediaSurfaceRotation(void *memory) = 0;
virtual void setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) = 0;
const cl_image_desc &getImageDesc() const;
const cl_image_format &getImageFormat() const;
const ClSurfaceFormatInfo &getSurfaceFormatInfo() const;
void transferDataToHostPtr(MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset) override;
void transferDataFromHostPtr(MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset) override;
static bool isFormatRedescribable(cl_image_format format);
Image *redescribe();
Image *redescribeFillImage();
ImageCreatFunc createFunction;
uint32_t getQPitch() { return qPitch; }
void setQPitch(uint32_t qPitch) { this->qPitch = qPitch; }
size_t getHostPtrRowPitch() const { return hostPtrRowPitch; }
void setHostPtrRowPitch(size_t pitch) { this->hostPtrRowPitch = pitch; }
size_t getHostPtrSlicePitch() const { return hostPtrSlicePitch; }
void setHostPtrSlicePitch(size_t pitch) { this->hostPtrSlicePitch = pitch; }
size_t getImageCount() const { return imageCount; }
void setImageCount(size_t imageCount) { this->imageCount = imageCount; }
void setImageRowPitch(size_t rowPitch) { imageDesc.image_row_pitch = rowPitch; }
void setImageSlicePitch(size_t slicePitch) { imageDesc.image_slice_pitch = slicePitch; }
void setSurfaceOffsets(uint64_t offset, uint32_t xOffset, uint32_t yOffset, uint32_t yOffsetForUVPlane) {
surfaceOffsets.offset = offset;
surfaceOffsets.xOffset = xOffset;
surfaceOffsets.yOffset = yOffset;
surfaceOffsets.yOffsetForUVplane = yOffsetForUVPlane;
}
void getSurfaceOffsets(SurfaceOffsets &surfaceOffsetsOut) { surfaceOffsetsOut = this->surfaceOffsets; }
void setCubeFaceIndex(uint32_t index) { cubeFaceIndex = index; }
uint32_t getCubeFaceIndex() { return cubeFaceIndex; }
void setMediaPlaneType(cl_uint type) { mediaPlaneType = type; }
cl_uint getMediaPlaneType() const { return mediaPlaneType; }
int peekBaseMipLevel() { return baseMipLevel; }
void setBaseMipLevel(int level) { this->baseMipLevel = level; }
uint32_t peekMipCount() { return mipCount; }
void setMipCount(uint32_t mipCountNew) { this->mipCount = mipCountNew; }
static const ClSurfaceFormatInfo *getSurfaceFormatFromTable(cl_mem_flags flags, const cl_image_format *imageFormat, unsigned int clVersionSupport);
static cl_int validateRegionAndOrigin(const size_t *origin, const size_t *region, const cl_image_desc &imgDesc);
cl_int writeNV12Planes(const void *hostPtr, size_t hostPtrRowPitch);
void setMcsSurfaceInfo(const McsSurfaceInfo &info) { mcsSurfaceInfo = info; }
const McsSurfaceInfo &getMcsSurfaceInfo() { return mcsSurfaceInfo; }
size_t calculateOffsetForMapping(const MemObjOffsetArray &origin) const override;
virtual void transformImage2dArrayTo3d(void *memory) = 0;
virtual void transformImage3dTo2dArray(void *memory) = 0;
bool hasSameDescriptor(const cl_image_desc &imageDesc) const;
bool hasValidParentImageFormat(const cl_image_format &imageFormat) const;
bool isImageFromBuffer() const { return castToObject<Buffer>(static_cast<cl_mem>(associatedMemObject)) ? true : false; }
bool isImageFromImage() const { return castToObject<Image>(static_cast<cl_mem>(associatedMemObject)) ? true : false; }
protected:
Image(Context *context,
const MemoryPropertiesFlags &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *hostPtr,
cl_image_format imageFormat,
const cl_image_desc &imageDesc,
bool zeroCopy,
GraphicsAllocation *graphicsAllocation,
bool isObjectRedescribed,
uint32_t baseMipLevel,
uint32_t mipCount,
const ClSurfaceFormatInfo &surfaceFormatInfo,
const SurfaceOffsets *surfaceOffsets = nullptr);
void getOsSpecificImageInfo(const cl_mem_info &paramName, size_t *srcParamSize, void **srcParam);
void transferData(void *dst, size_t dstRowPitch, size_t dstSlicePitch,
void *src, size_t srcRowPitch, size_t srcSlicePitch,
std::array<size_t, 3> copyRegion, std::array<size_t, 3> copyOrigin);
cl_image_format imageFormat;
cl_image_desc imageDesc;
ClSurfaceFormatInfo surfaceFormatInfo;
McsSurfaceInfo mcsSurfaceInfo = {};
uint32_t qPitch = 0;
size_t hostPtrRowPitch = 0;
size_t hostPtrSlicePitch = 0;
size_t imageCount = 0;
uint32_t cubeFaceIndex;
cl_uint mediaPlaneType;
SurfaceOffsets surfaceOffsets = {0};
uint32_t baseMipLevel = 0;
uint32_t mipCount = 1;
static bool isValidSingleChannelFormat(const cl_image_format *imageFormat);
static bool isValidIntensityFormat(const cl_image_format *imageFormat);
static bool isValidLuminanceFormat(const cl_image_format *imageFormat);
static bool isValidDepthFormat(const cl_image_format *imageFormat);
static bool isValidDoubleChannelFormat(const cl_image_format *imageFormat);
static bool isValidTripleChannelFormat(const cl_image_format *imageFormat);
static bool isValidRGBAFormat(const cl_image_format *imageFormat);
static bool isValidSRGBFormat(const cl_image_format *imageFormat);
static bool isValidARGBFormat(const cl_image_format *imageFormat);
static bool isValidDepthStencilFormat(const cl_image_format *imageFormat);
static bool isValidYUVFormat(const cl_image_format *imageFormat);
static bool hasAlphaChannel(const cl_image_format *imageFormat);
};
template <typename GfxFamily>
class ImageHw : public Image {
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
public:
ImageHw(Context *context,
const MemoryPropertiesFlags &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *hostPtr,
const cl_image_format &imageFormat,
const cl_image_desc &imageDesc,
bool zeroCopy,
GraphicsAllocation *graphicsAllocation,
bool isObjectRedescribed,
uint32_t baseMipLevel,
uint32_t mipCount,
const ClSurfaceFormatInfo &surfaceFormatInfo,
const SurfaceOffsets *surfaceOffsets = nullptr)
: Image(context, memoryProperties, flags, flagsIntel, size, hostPtr, imageFormat, imageDesc,
zeroCopy, graphicsAllocation, isObjectRedescribed, baseMipLevel, mipCount, surfaceFormatInfo, surfaceOffsets) {
if (getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D ||
getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER ||
getImageDesc().image_type == CL_MEM_OBJECT_IMAGE2D ||
getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
getImageDesc().image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
this->imageDesc.image_depth = 0;
}
switch (imageDesc.image_type) {
case CL_MEM_OBJECT_IMAGE1D:
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_1D;
break;
default:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
case CL_MEM_OBJECT_IMAGE2D:
surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_2D;
break;
case CL_MEM_OBJECT_IMAGE3D:
surfaceType = RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_3D;
break;
}
}
void setImageArg(void *memory, bool setAsMediaBlockImage, uint32_t mipLevel) override;
void setAuxParamsForMultisamples(RENDER_SURFACE_STATE *surfaceState);
MOCKABLE_VIRTUAL void setAuxParamsForMCSCCS(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm);
void setMediaImageArg(void *memory) override;
void setMediaSurfaceRotation(void *memory) override;
void setSurfaceMemoryObjectControlStateIndexToMocsTable(void *memory, uint32_t value) override;
void appendSurfaceStateParams(RENDER_SURFACE_STATE *surfaceState);
void appendSurfaceStateDepthParams(RENDER_SURFACE_STATE *surfaceState);
void appendSurfaceStateExt(void *memory);
void transformImage2dArrayTo3d(void *memory) override;
void transformImage3dTo2dArray(void *memory) override;
static Image *create(Context *context,
const MemoryPropertiesFlags &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *hostPtr,
const cl_image_format &imageFormat,
const cl_image_desc &imageDesc,
bool zeroCopy,
GraphicsAllocation *graphicsAllocation,
bool isObjectRedescribed,
uint32_t baseMipLevel,
uint32_t mipCount,
const ClSurfaceFormatInfo *surfaceFormatInfo,
const SurfaceOffsets *surfaceOffsets) {
UNRECOVERABLE_IF(surfaceFormatInfo == nullptr);
return new ImageHw<GfxFamily>(context,
memoryProperties,
flags,
flagsIntel,
size,
hostPtr,
imageFormat,
imageDesc,
zeroCopy,
graphicsAllocation,
isObjectRedescribed,
baseMipLevel,
mipCount,
*surfaceFormatInfo,
surfaceOffsets);
}
static int getShaderChannelValue(int inputShaderChannel, cl_channel_order imageChannelOrder) {
if (imageChannelOrder == CL_A) {
if (inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED ||
inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN ||
inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE) {
return RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO;
}
} else if (imageChannelOrder == CL_R ||
imageChannelOrder == CL_RA ||
imageChannelOrder == CL_Rx) {
if (inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN ||
inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE) {
return RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO;
}
} else if (imageChannelOrder == CL_RG ||
imageChannelOrder == CL_RGx) {
if (inputShaderChannel == RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE) {
return RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO;
}
}
return inputShaderChannel;
}
typename RENDER_SURFACE_STATE::SURFACE_TYPE surfaceType;
};
} // namespace NEO

View File

@@ -0,0 +1,197 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "core/execution_environment/execution_environment.h"
#include "core/gmm_helper/gmm.h"
#include "core/gmm_helper/gmm_helper.h"
#include "core/gmm_helper/resource_info.h"
#include "core/helpers/aligned_memory.h"
#include "core/helpers/hw_cmds.h"
#include "helpers/surface_formats.h"
#include "image_ext.inl"
#include "mem_obj/image.h"
namespace NEO {
union SURFACE_STATE_BUFFER_LENGTH {
uint32_t Length;
struct SurfaceState {
uint32_t Width : BITFIELD_RANGE(0, 6);
uint32_t Height : BITFIELD_RANGE(7, 20);
uint32_t Depth : BITFIELD_RANGE(21, 31);
} SurfaceState;
};
template <typename GfxFamily>
void ImageHw<GfxFamily>::setImageArg(void *memory, bool setAsMediaBlockImage, uint32_t mipLevel) {
using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(memory);
auto gmm = getGraphicsAllocation()->getDefaultGmm();
auto gmmHelper = executionEnvironment->getGmmHelper();
auto imageDescriptor = Image::convertDescriptor(getImageDesc());
ImageInfo imgInfo;
imgInfo.imgDesc = imageDescriptor;
imgInfo.qPitch = qPitch;
imgInfo.surfaceFormat = &getSurfaceFormatInfo().surfaceFormat;
setImageSurfaceState<GfxFamily>(surfaceState, imgInfo, getGraphicsAllocation()->getDefaultGmm(), *gmmHelper, cubeFaceIndex, getGraphicsAllocation()->getGpuAddress(), surfaceOffsets, IsNV12Image(&this->getImageFormat()));
if (getImageDesc().image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
// image1d_buffer is image1d created from buffer. The length of buffer could be larger
// than the maximal image width. Mock image1d_buffer with SURFACE_TYPE_SURFTYPE_BUFFER.
SURFACE_STATE_BUFFER_LENGTH Length = {0};
Length.Length = static_cast<uint32_t>(getImageDesc().image_width - 1);
surfaceState->setWidth(static_cast<uint32_t>(Length.SurfaceState.Width + 1));
surfaceState->setHeight(static_cast<uint32_t>(Length.SurfaceState.Height + 1));
surfaceState->setDepth(static_cast<uint32_t>(Length.SurfaceState.Depth + 1));
surfaceState->setSurfacePitch(static_cast<uint32_t>(getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes));
surfaceState->setSurfaceType(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER);
} else {
setImageSurfaceStateDimensions<GfxFamily>(surfaceState, imgInfo, cubeFaceIndex, surfaceType);
if (setAsMediaBlockImage) {
uint32_t elSize = static_cast<uint32_t>(getSurfaceFormatInfo().surfaceFormat.ImageElementSizeInBytes);
surfaceState->setWidth(static_cast<uint32_t>((getImageDesc().image_width * elSize) / sizeof(uint32_t)));
}
}
surfaceState->setSurfaceMinLod(this->baseMipLevel + mipLevel);
surfaceState->setMipCountLod((this->mipCount > 0) ? (this->mipCount - 1) : 0);
setMipTailStartLod<GfxFamily>(surfaceState, gmm);
cl_channel_order imgChannelOrder = getSurfaceFormatInfo().OCLImageFormat.image_channel_order;
int shaderChannelValue = ImageHw<GfxFamily>::getShaderChannelValue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, imgChannelOrder);
surfaceState->setShaderChannelSelectRed(static_cast<typename RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT>(shaderChannelValue));
if (imgChannelOrder == CL_LUMINANCE) {
surfaceState->setShaderChannelSelectGreen(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED);
surfaceState->setShaderChannelSelectBlue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED);
} else {
shaderChannelValue = ImageHw<GfxFamily>::getShaderChannelValue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_GREEN, imgChannelOrder);
surfaceState->setShaderChannelSelectGreen(static_cast<typename RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT>(shaderChannelValue));
shaderChannelValue = ImageHw<GfxFamily>::getShaderChannelValue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_BLUE, imgChannelOrder);
surfaceState->setShaderChannelSelectBlue(static_cast<typename RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT>(shaderChannelValue));
}
surfaceState->setNumberOfMultisamples((typename RENDER_SURFACE_STATE::NUMBER_OF_MULTISAMPLES)mcsSurfaceInfo.multisampleCount);
if (imageDesc.num_samples > 1) {
setAuxParamsForMultisamples(surfaceState);
} else if (gmm && gmm->isRenderCompressed) {
setAuxParamsForCCS<GfxFamily>(surfaceState, gmm);
}
appendSurfaceStateDepthParams(surfaceState);
appendSurfaceStateParams(surfaceState);
appendSurfaceStateExt(surfaceState);
}
template <typename GfxFamily>
void ImageHw<GfxFamily>::setAuxParamsForMultisamples(RENDER_SURFACE_STATE *surfaceState) {
using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
if (getMcsAllocation()) {
auto mcsGmm = getMcsAllocation()->getDefaultGmm();
if (mcsGmm->unifiedAuxTranslationCapable() && mcsGmm->hasMultisampleControlSurface()) {
setAuxParamsForMCSCCS(surfaceState, mcsGmm);
surfaceState->setAuxiliarySurfacePitch(mcsGmm->getUnifiedAuxPitchTiles());
surfaceState->setAuxiliarySurfaceQpitch(mcsGmm->getAuxQPitch());
setClearColorParams<GfxFamily>(surfaceState, mcsGmm);
setUnifiedAuxBaseAddress<GfxFamily>(surfaceState, mcsGmm);
} else if (mcsGmm->unifiedAuxTranslationCapable()) {
setAuxParamsForCCS<GfxFamily>(surfaceState, mcsGmm);
} else {
surfaceState->setAuxiliarySurfaceMode((typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE)1);
surfaceState->setAuxiliarySurfacePitch(mcsSurfaceInfo.pitch);
surfaceState->setAuxiliarySurfaceQpitch(mcsSurfaceInfo.qPitch);
surfaceState->setAuxiliarySurfaceBaseAddress(mcsAllocation->getGpuAddress());
}
} else if (isDepthFormat(imageFormat) && surfaceState->getSurfaceFormat() != SURFACE_FORMAT::SURFACE_FORMAT_R32_FLOAT_X8X24_TYPELESS) {
surfaceState->setMultisampledSurfaceStorageFormat(RENDER_SURFACE_STATE::MULTISAMPLED_SURFACE_STORAGE_FORMAT::MULTISAMPLED_SURFACE_STORAGE_FORMAT_DEPTH_STENCIL);
}
}
template <typename GfxFamily>
void ImageHw<GfxFamily>::appendSurfaceStateParams(RENDER_SURFACE_STATE *surfaceState) {
}
template <typename GfxFamily>
inline void ImageHw<GfxFamily>::appendSurfaceStateDepthParams(RENDER_SURFACE_STATE *surfaceState) {
}
template <typename GfxFamily>
void ImageHw<GfxFamily>::setMediaImageArg(void *memory) {
using MEDIA_SURFACE_STATE = typename GfxFamily::MEDIA_SURFACE_STATE;
using SURFACE_FORMAT = typename MEDIA_SURFACE_STATE::SURFACE_FORMAT;
SURFACE_FORMAT surfaceFormat = MEDIA_SURFACE_STATE::SURFACE_FORMAT_Y8_UNORM_VA;
auto gmmHelper = executionEnvironment->getGmmHelper();
auto surfaceState = reinterpret_cast<MEDIA_SURFACE_STATE *>(memory);
*surfaceState = GfxFamily::cmdInitMediaSurfaceState;
setMediaSurfaceRotation(reinterpret_cast<void *>(surfaceState));
DEBUG_BREAK_IF(surfaceFormat == MEDIA_SURFACE_STATE::SURFACE_FORMAT_Y1_UNORM);
surfaceState->setWidth(static_cast<uint32_t>(getImageDesc().image_width));
surfaceState->setHeight(static_cast<uint32_t>(getImageDesc().image_height));
surfaceState->setPictureStructure(MEDIA_SURFACE_STATE::PICTURE_STRUCTURE_FRAME_PICTURE);
auto gmm = getGraphicsAllocation()->getDefaultGmm();
auto tileMode = static_cast<typename MEDIA_SURFACE_STATE::TILE_MODE>(gmm->gmmResourceInfo->getTileModeSurfaceState());
surfaceState->setTileMode(tileMode);
surfaceState->setSurfacePitch(static_cast<uint32_t>(getImageDesc().image_row_pitch));
surfaceState->setSurfaceFormat(surfaceFormat);
surfaceState->setHalfPitchForChroma(false);
surfaceState->setInterleaveChroma(false);
surfaceState->setXOffsetForUCb(0);
surfaceState->setYOffsetForUCb(0);
surfaceState->setXOffsetForVCr(0);
surfaceState->setYOffsetForVCr(0);
setSurfaceMemoryObjectControlStateIndexToMocsTable(
reinterpret_cast<void *>(surfaceState),
gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_IMAGE));
if (IsNV12Image(&this->getImageFormat())) {
surfaceState->setInterleaveChroma(true);
surfaceState->setYOffsetForUCb(this->surfaceOffsets.yOffsetForUVplane);
}
surfaceState->setVerticalLineStride(0);
surfaceState->setVerticalLineStrideOffset(0);
surfaceState->setSurfaceBaseAddress(getGraphicsAllocation()->getGpuAddress() + this->surfaceOffsets.offset);
}
template <typename GfxFamily>
void ImageHw<GfxFamily>::transformImage2dArrayTo3d(void *memory) {
DEBUG_BREAK_IF(imageDesc.image_type != CL_MEM_OBJECT_IMAGE3D);
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(memory);
surfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D);
surfaceState->setSurfaceArray(false);
}
template <typename GfxFamily>
void ImageHw<GfxFamily>::transformImage3dTo2dArray(void *memory) {
DEBUG_BREAK_IF(imageDesc.image_type != CL_MEM_OBJECT_IMAGE3D);
using SURFACE_TYPE = typename RENDER_SURFACE_STATE::SURFACE_TYPE;
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(memory);
surfaceState->setSurfaceType(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D);
surfaceState->setSurfaceArray(true);
}
template <typename GfxFamily>
void ImageHw<GfxFamily>::setAuxParamsForMCSCCS(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm) {
}
} // namespace NEO

View File

@@ -0,0 +1,13 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
template class ImageHw<Family>;
template <>
void populateFactoryTable<ImageHw<Family>>() {
extern ImageFuncs imageFactory[IGFX_MAX_CORE];
imageFactory[gfxCore].createImageFunction = ImageHw<Family>::create;
}

View File

@@ -0,0 +1,20 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
template <>
void ImageHw<Family>::setAuxParamsForMCSCCS(RENDER_SURFACE_STATE *surfaceState, Gmm *gmm) {
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_MCS_LCE);
}
template <>
void ImageHw<Family>::appendSurfaceStateDepthParams(RENDER_SURFACE_STATE *surfaceState) {
const auto gmm = this->graphicsAllocation->getDefaultGmm();
if (gmm) {
const bool isDepthResource = gmm->gmmResourceInfo->getResourceFlags()->Gpu.Depth;
surfaceState->setDepthStencilResource(isDepthResource);
}
}

View File

@@ -0,0 +1,74 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "mem_obj/map_operations_handler.h"
#include "core/helpers/ptr_math.h"
using namespace NEO;
size_t MapOperationsHandler::size() const {
std::lock_guard<std::mutex> lock(mtx);
return mappedPointers.size();
}
bool MapOperationsHandler::add(void *ptr, size_t ptrLength, cl_map_flags &mapFlags, MemObjSizeArray &size, MemObjOffsetArray &offset, uint32_t mipLevel) {
std::lock_guard<std::mutex> lock(mtx);
MapInfo mapInfo(ptr, ptrLength, size, offset, mipLevel);
mapInfo.readOnly = (mapFlags == CL_MAP_READ);
if (isOverlapping(mapInfo)) {
return false;
}
mappedPointers.push_back(mapInfo);
return true;
}
bool MapOperationsHandler::isOverlapping(MapInfo &inputMapInfo) {
if (inputMapInfo.readOnly) {
return false;
}
auto inputStartPtr = inputMapInfo.ptr;
auto inputEndPtr = ptrOffset(inputStartPtr, inputMapInfo.ptrLength);
for (auto &mapInfo : mappedPointers) {
auto mappedStartPtr = mapInfo.ptr;
auto mappedEndPtr = ptrOffset(mappedStartPtr, mapInfo.ptrLength);
// Requested ptr starts before or inside existing ptr range and overlapping end
if (inputStartPtr < mappedEndPtr && inputEndPtr >= mappedStartPtr) {
return true;
}
}
return false;
}
bool MapOperationsHandler::find(void *mappedPtr, MapInfo &outMapInfo) {
std::lock_guard<std::mutex> lock(mtx);
for (auto &mapInfo : mappedPointers) {
if (mapInfo.ptr == mappedPtr) {
outMapInfo = mapInfo;
return true;
}
}
return false;
}
void MapOperationsHandler::remove(void *mappedPtr) {
std::lock_guard<std::mutex> lock(mtx);
auto endIter = mappedPointers.end();
for (auto it = mappedPointers.begin(); it != endIter; it++) {
if (it->ptr == mappedPtr) {
std::iter_swap(it, mappedPointers.end() - 1);
mappedPointers.pop_back();
break;
}
}
}

View File

@@ -0,0 +1,31 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "helpers/properties_helper.h"
#include <mutex>
#include <vector>
namespace NEO {
class MapOperationsHandler {
public:
virtual ~MapOperationsHandler() = default;
bool add(void *ptr, size_t ptrLength, cl_map_flags &mapFlags, MemObjSizeArray &size, MemObjOffsetArray &offset, uint32_t mipLevel);
void remove(void *mappedPtr);
bool find(void *mappedPtr, MapInfo &outMapInfo);
size_t size() const;
protected:
bool isOverlapping(MapInfo &inputMapInfo);
std::vector<MapInfo> mappedPointers;
mutable std::mutex mtx;
};
} // namespace NEO

View File

@@ -0,0 +1,363 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "mem_obj/mem_obj.h"
#include "core/command_stream/command_stream_receiver.h"
#include "core/gmm_helper/gmm.h"
#include "core/gmm_helper/resource_info.h"
#include "core/helpers/aligned_memory.h"
#include "core/helpers/bit_helpers.h"
#include "core/helpers/get_info.h"
#include "core/memory_manager/deferred_deleter.h"
#include "core/memory_manager/internal_allocation_storage.h"
#include "core/memory_manager/memory_manager.h"
#include "core/os_interface/os_context.h"
#include "command_queue/command_queue.h"
#include "context/context.h"
#include "device/cl_device.h"
#include "helpers/get_info_status_mapper.h"
#include <algorithm>
namespace NEO {
MemObj::MemObj(Context *context,
cl_mem_object_type memObjectType,
const MemoryPropertiesFlags &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *memoryStorage,
void *hostPtr,
GraphicsAllocation *gfxAllocation,
bool zeroCopy,
bool isHostPtrSVM,
bool isObjectRedescribed)
: context(context), memObjectType(memObjectType), memoryProperties(memoryProperties), flags(flags), flagsIntel(flagsIntel), size(size),
memoryStorage(memoryStorage), hostPtr(hostPtr),
isZeroCopy(zeroCopy), isHostPtrSVM(isHostPtrSVM), isObjectRedescribed(isObjectRedescribed),
graphicsAllocation(gfxAllocation) {
if (context) {
context->incRefInternal();
memoryManager = context->getMemoryManager();
executionEnvironment = context->getDevice(0)->getExecutionEnvironment();
}
}
MemObj::~MemObj() {
bool needWait = false;
if (allocatedMapPtr != nullptr) {
needWait = true;
}
if (mapOperationsHandler.size() > 0 && !getCpuAddressForMapping()) {
needWait = true;
}
if (!destructorCallbacks.empty()) {
needWait = true;
}
if (memoryManager && !isObjectRedescribed) {
if (peekSharingHandler()) {
peekSharingHandler()->releaseReusedGraphicsAllocation();
}
if (graphicsAllocation && !associatedMemObject && !isHostPtrSVM && graphicsAllocation->peekReuseCount() == 0) {
memoryManager->removeAllocationFromHostPtrManager(graphicsAllocation);
bool doAsyncDestructions = DebugManager.flags.EnableAsyncDestroyAllocations.get();
if (!doAsyncDestructions) {
needWait = true;
}
if (needWait && graphicsAllocation->isUsed()) {
memoryManager->waitForEnginesCompletion(*graphicsAllocation);
}
destroyGraphicsAllocation(graphicsAllocation, doAsyncDestructions);
graphicsAllocation = nullptr;
}
if (!associatedMemObject) {
releaseMapAllocation();
releaseAllocatedMapPtr();
}
if (mcsAllocation) {
destroyGraphicsAllocation(mcsAllocation, false);
}
if (associatedMemObject) {
if (associatedMemObject->getGraphicsAllocation() != this->getGraphicsAllocation()) {
destroyGraphicsAllocation(graphicsAllocation, false);
}
associatedMemObject->decRefInternal();
}
}
if (!destructorCallbacks.empty()) {
for (auto iter = destructorCallbacks.rbegin(); iter != destructorCallbacks.rend(); iter++) {
(*iter)->invoke(this);
delete *iter;
}
}
if (context) {
context->decRefInternal();
}
}
void MemObj::DestructorCallback::invoke(cl_mem memObj) {
this->funcNotify(memObj, userData);
}
cl_int MemObj::getMemObjectInfo(cl_mem_info paramName,
size_t paramValueSize,
void *paramValue,
size_t *paramValueSizeRet) {
cl_int retVal;
size_t srcParamSize = 0;
void *srcParam = nullptr;
cl_bool usesSVMPointer;
cl_uint refCnt = 0;
cl_uint mapCount = 0;
cl_mem clAssociatedMemObject = static_cast<cl_mem>(this->associatedMemObject);
cl_context ctx = nullptr;
uint64_t internalHandle = 0llu;
switch (paramName) {
case CL_MEM_TYPE:
srcParamSize = sizeof(memObjectType);
srcParam = &memObjectType;
break;
case CL_MEM_FLAGS:
srcParamSize = sizeof(flags);
srcParam = &flags;
break;
case CL_MEM_SIZE:
srcParamSize = sizeof(size);
srcParam = &size;
break;
case CL_MEM_HOST_PTR:
srcParamSize = sizeof(hostPtr);
srcParam = &hostPtr;
break;
case CL_MEM_CONTEXT:
srcParamSize = sizeof(context);
ctx = context;
srcParam = &ctx;
break;
case CL_MEM_USES_SVM_POINTER:
usesSVMPointer = isHostPtrSVM && isValueSet(flags, CL_MEM_USE_HOST_PTR);
srcParamSize = sizeof(cl_bool);
srcParam = &usesSVMPointer;
break;
case CL_MEM_OFFSET:
srcParamSize = sizeof(offset);
srcParam = &offset;
break;
case CL_MEM_ASSOCIATED_MEMOBJECT:
srcParamSize = sizeof(clAssociatedMemObject);
srcParam = &clAssociatedMemObject;
break;
case CL_MEM_MAP_COUNT:
srcParamSize = sizeof(mapCount);
mapCount = static_cast<cl_uint>(mapOperationsHandler.size());
srcParam = &mapCount;
break;
case CL_MEM_REFERENCE_COUNT:
refCnt = static_cast<cl_uint>(this->getReference());
srcParamSize = sizeof(refCnt);
srcParam = &refCnt;
break;
case CL_MEM_ALLOCATION_HANDLE_INTEL:
internalHandle = this->getGraphicsAllocation()->peekInternalHandle(this->memoryManager);
srcParamSize = sizeof(internalHandle);
srcParam = &internalHandle;
break;
default:
getOsSpecificMemObjectInfo(paramName, &srcParamSize, &srcParam);
break;
}
retVal = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, srcParam, srcParamSize));
if (paramValueSizeRet) {
*paramValueSizeRet = srcParamSize;
}
return retVal;
}
cl_int MemObj::setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_mem, void *),
void *userData) {
auto cb = new DestructorCallback(funcNotify, userData);
std::unique_lock<std::mutex> theLock(mtx);
destructorCallbacks.push_back(cb);
return CL_SUCCESS;
}
void *MemObj::getCpuAddress() const {
return memoryStorage;
}
void *MemObj::getHostPtr() const {
return hostPtr;
}
size_t MemObj::getSize() const {
return size;
}
void MemObj::setAllocatedMapPtr(void *allocatedMapPtr) {
this->allocatedMapPtr = allocatedMapPtr;
}
bool MemObj::isMemObjZeroCopy() const {
return isZeroCopy;
}
bool MemObj::isMemObjWithHostPtrSVM() const {
return isHostPtrSVM;
}
bool MemObj::isMemObjUncacheable() const {
return isValueSet(flagsIntel, CL_MEM_LOCALLY_UNCACHED_RESOURCE);
}
bool MemObj::isMemObjUncacheableForSurfaceState() const {
return isAnyBitSet(flagsIntel, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE | CL_MEM_LOCALLY_UNCACHED_RESOURCE);
}
GraphicsAllocation *MemObj::getGraphicsAllocation() const {
return graphicsAllocation;
}
void MemObj::resetGraphicsAllocation(GraphicsAllocation *newGraphicsAllocation) {
TakeOwnershipWrapper<MemObj> lock(*this);
if (graphicsAllocation != nullptr && (peekSharingHandler() == nullptr || graphicsAllocation->peekReuseCount() == 0)) {
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation);
}
graphicsAllocation = newGraphicsAllocation;
}
bool MemObj::readMemObjFlagsInvalid() {
return isValueSet(flags, CL_MEM_HOST_WRITE_ONLY) || isValueSet(flags, CL_MEM_HOST_NO_ACCESS);
}
bool MemObj::writeMemObjFlagsInvalid() {
return isValueSet(flags, CL_MEM_HOST_READ_ONLY) || isValueSet(flags, CL_MEM_HOST_NO_ACCESS);
}
bool MemObj::mapMemObjFlagsInvalid(cl_map_flags mapFlags) {
return (writeMemObjFlagsInvalid() && (mapFlags & CL_MAP_WRITE)) ||
(readMemObjFlagsInvalid() && (mapFlags & CL_MAP_READ));
}
void MemObj::setHostPtrMinSize(size_t size) {
hostPtrMinSize = size;
}
void *MemObj::getCpuAddressForMapping() {
void *ptrToReturn = nullptr;
if (isValueSet(flags, CL_MEM_USE_HOST_PTR)) {
ptrToReturn = this->hostPtr;
} else {
ptrToReturn = this->memoryStorage;
}
return ptrToReturn;
}
void *MemObj::getCpuAddressForMemoryTransfer() {
void *ptrToReturn = nullptr;
if (isValueSet(flags, CL_MEM_USE_HOST_PTR) && this->isMemObjZeroCopy()) {
ptrToReturn = this->hostPtr;
} else {
ptrToReturn = this->memoryStorage;
}
return ptrToReturn;
}
void MemObj::releaseAllocatedMapPtr() {
if (allocatedMapPtr) {
DEBUG_BREAK_IF(isValueSet(flags, CL_MEM_USE_HOST_PTR));
memoryManager->freeSystemMemory(allocatedMapPtr);
}
allocatedMapPtr = nullptr;
}
void MemObj::releaseMapAllocation() {
if (mapAllocation && !isHostPtrSVM) {
destroyGraphicsAllocation(mapAllocation, false);
}
}
void MemObj::destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyncDestroy) {
if (asyncDestroy) {
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(allocation);
} else {
memoryManager->freeGraphicsMemory(allocation);
}
}
bool MemObj::checkIfMemoryTransferIsRequired(size_t offsetInMemObject, size_t offsetInHostPtr, const void *hostPtr, cl_command_type cmdType) {
auto bufferStorage = ptrOffset(this->getCpuAddressForMemoryTransfer(), offsetInMemObject);
auto hostStorage = ptrOffset(hostPtr, offsetInHostPtr);
auto isMemTransferNeeded = !((bufferStorage == hostStorage) &&
(cmdType == CL_COMMAND_WRITE_BUFFER || cmdType == CL_COMMAND_READ_BUFFER ||
cmdType == CL_COMMAND_WRITE_BUFFER_RECT || cmdType == CL_COMMAND_READ_BUFFER_RECT ||
cmdType == CL_COMMAND_WRITE_IMAGE || cmdType == CL_COMMAND_READ_IMAGE));
return isMemTransferNeeded;
}
void *MemObj::getBasePtrForMap(uint32_t rootDeviceIndex) {
if (associatedMemObject) {
return associatedMemObject->getBasePtrForMap(rootDeviceIndex);
}
if (getMemoryPropertiesFlags() & CL_MEM_USE_HOST_PTR) {
return getHostPtr();
} else {
TakeOwnershipWrapper<MemObj> memObjOwnership(*this);
if (getMapAllocation()) {
return getMapAllocation()->getUnderlyingBuffer();
} else {
auto memory = memoryManager->allocateSystemMemory(getSize(), MemoryConstants::pageSize);
setAllocatedMapPtr(memory);
AllocationProperties properties{rootDeviceIndex, false, getSize(), GraphicsAllocation::AllocationType::MAP_ALLOCATION, false};
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, memory);
setMapAllocation(allocation);
return getAllocatedMapPtr();
}
}
}
bool MemObj::addMappedPtr(void *ptr, size_t ptrLength, cl_map_flags &mapFlags,
MemObjSizeArray &size, MemObjOffsetArray &offset,
uint32_t mipLevel) {
return mapOperationsHandler.add(ptr, ptrLength, mapFlags, size, offset,
mipLevel);
}
bool MemObj::isTiledAllocation() const {
auto gmm = graphicsAllocation->getDefaultGmm();
return gmm && (gmm->gmmResourceInfo->getTileModeSurfaceState() != 0);
}
bool MemObj::mappingOnCpuAllowed() const {
return !isTiledAllocation() && !peekSharingHandler() && !isMipMapped(this) && !DebugManager.flags.DisableZeroCopyForBuffers.get() &&
!(graphicsAllocation->getDefaultGmm() && graphicsAllocation->getDefaultGmm()->isRenderCompressed) &&
MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool());
}
} // namespace NEO

View File

@@ -0,0 +1,171 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "core/debug_settings/debug_settings_manager.h"
#include "api/cl_types.h"
#include "extensions/public/cl_ext_private.h"
#include "helpers/base_object.h"
#include "helpers/mipmap.h"
#include "mem_obj/map_operations_handler.h"
#include "memory_properties_flags.h"
#include "sharings/sharing.h"
#include <atomic>
#include <cstdint>
#include <vector>
namespace NEO {
class ExecutionEnvironment;
class GraphicsAllocation;
struct KernelInfo;
class MemoryManager;
class Context;
template <>
struct OpenCLObjectMapper<_cl_mem> {
typedef class MemObj DerivedType;
};
class MemObj : public BaseObject<_cl_mem> {
public:
constexpr static cl_ulong maskMagic = 0xFFFFFFFFFFFFFF00LL;
constexpr static cl_ulong objectMagic = 0xAB2212340CACDD00LL;
MemObj(Context *context,
cl_mem_object_type memObjectType,
const MemoryPropertiesFlags &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
size_t size,
void *memoryStorage,
void *hostPtr,
GraphicsAllocation *gfxAllocation,
bool zeroCopy,
bool isHostPtrSVM,
bool isObjectRedescrbied);
~MemObj() override;
cl_int getMemObjectInfo(cl_mem_info paramName,
size_t paramValueSize,
void *paramValue,
size_t *paramValueSizeRet);
cl_int setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_mem, void *),
void *userData);
void *getCpuAddress() const;
void *getHostPtr() const;
bool getIsObjectRedescribed() const { return isObjectRedescribed; };
size_t getSize() const;
bool addMappedPtr(void *ptr, size_t ptrLength, cl_map_flags &mapFlags, MemObjSizeArray &size, MemObjOffsetArray &offset, uint32_t mipLevel);
bool findMappedPtr(void *mappedPtr, MapInfo &outMapInfo) { return mapOperationsHandler.find(mappedPtr, outMapInfo); }
void removeMappedPtr(void *mappedPtr) { mapOperationsHandler.remove(mappedPtr); }
void *getBasePtrForMap(uint32_t rootDeviceIndex);
MOCKABLE_VIRTUAL void setAllocatedMapPtr(void *allocatedMapPtr);
void *getAllocatedMapPtr() const { return allocatedMapPtr; }
void setHostPtrMinSize(size_t size);
void releaseAllocatedMapPtr();
void releaseMapAllocation();
bool isMemObjZeroCopy() const;
bool isMemObjWithHostPtrSVM() const;
bool isMemObjUncacheable() const;
bool isMemObjUncacheableForSurfaceState() const;
virtual void transferDataToHostPtr(MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset) { UNRECOVERABLE_IF(true); };
virtual void transferDataFromHostPtr(MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset) { UNRECOVERABLE_IF(true); };
GraphicsAllocation *getGraphicsAllocation() const;
void resetGraphicsAllocation(GraphicsAllocation *newGraphicsAllocation);
GraphicsAllocation *getMcsAllocation() { return mcsAllocation; }
void setMcsAllocation(GraphicsAllocation *alloc) { mcsAllocation = alloc; }
bool readMemObjFlagsInvalid();
bool writeMemObjFlagsInvalid();
bool mapMemObjFlagsInvalid(cl_map_flags mapFlags);
MOCKABLE_VIRTUAL bool isTiledAllocation() const;
void *getCpuAddressForMapping();
void *getCpuAddressForMemoryTransfer();
std::shared_ptr<SharingHandler> &getSharingHandler() { return sharingHandler; }
SharingHandler *peekSharingHandler() const { return sharingHandler.get(); }
void setSharingHandler(SharingHandler *sharingHandler) { this->sharingHandler.reset(sharingHandler); }
void setParentSharingHandler(std::shared_ptr<SharingHandler> &handler) { sharingHandler = handler; }
unsigned int acquireCount = 0;
Context *getContext() const { return context; }
void destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyncDestroy);
bool checkIfMemoryTransferIsRequired(size_t offsetInMemObject, size_t offsetInHostPtr, const void *ptr, cl_command_type cmdType);
bool mappingOnCpuAllowed() const;
virtual size_t calculateOffsetForMapping(const MemObjOffsetArray &offset) const { return offset[0]; }
size_t calculateMappedPtrLength(const MemObjSizeArray &size) const { return calculateOffsetForMapping(size); }
cl_mem_object_type peekClMemObjType() const { return memObjectType; }
size_t getOffset() const { return offset; }
MemoryManager *getMemoryManager() const {
return memoryManager;
}
void setMapAllocation(GraphicsAllocation *allocation) {
mapAllocation = allocation;
}
GraphicsAllocation *getMapAllocation() const {
if (associatedMemObject) {
return associatedMemObject->getMapAllocation();
}
return mapAllocation;
}
const cl_mem_flags &getMemoryPropertiesFlags() const { return flags; }
const cl_mem_flags &getMemoryPropertiesFlagsIntel() const { return flagsIntel; }
protected:
void getOsSpecificMemObjectInfo(const cl_mem_info &paramName, size_t *srcParamSize, void **srcParam);
Context *context;
cl_mem_object_type memObjectType;
MemoryPropertiesFlags memoryProperties;
cl_mem_flags flags = 0;
cl_mem_flags_intel flagsIntel = 0;
size_t size;
size_t hostPtrMinSize = 0;
void *memoryStorage;
void *hostPtr;
void *allocatedMapPtr = nullptr;
MapOperationsHandler mapOperationsHandler;
size_t offset = 0;
MemObj *associatedMemObject = nullptr;
cl_uint refCount = 0;
ExecutionEnvironment *executionEnvironment = nullptr;
bool isZeroCopy;
bool isHostPtrSVM;
bool isObjectRedescribed;
MemoryManager *memoryManager = nullptr;
GraphicsAllocation *graphicsAllocation;
GraphicsAllocation *mcsAllocation = nullptr;
GraphicsAllocation *mapAllocation = nullptr;
std::shared_ptr<SharingHandler> sharingHandler;
class DestructorCallback {
public:
DestructorCallback(void(CL_CALLBACK *funcNotify)(cl_mem, void *),
void *userData)
: funcNotify(funcNotify), userData(userData){};
void invoke(cl_mem memObj);
private:
void(CL_CALLBACK *funcNotify)(cl_mem, void *);
void *userData;
};
std::vector<DestructorCallback *> destructorCallbacks;
};
} // namespace NEO

View File

@@ -0,0 +1,25 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "mem_obj/mem_obj_helper_common.inl"
#include "memory_properties_flags.h"
namespace NEO {
bool MemObjHelper::isSuitableForRenderCompression(bool renderCompressed, const MemoryPropertiesFlags &properties, Context &context, bool preferCompression) {
return renderCompressed && preferCompression;
}
bool MemObjHelper::validateExtraMemoryProperties(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel) {
return true;
}
const uint64_t MemObjHelper::extraFlags = 0;
const uint64_t MemObjHelper::extraFlagsIntel = 0;
} // namespace NEO

View File

@@ -0,0 +1,42 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "core/helpers/bit_helpers.h"
#include "core/memory_manager/memory_manager.h"
#include "core/memory_manager/unified_memory_manager.h"
#include "CL/cl.h"
#include "extensions/public/cl_ext_private.h"
#include "helpers/mem_properties_parser_helper.h"
#include "mem_obj/mem_obj.h"
#include "memory_properties_flags.h"
namespace NEO {
class MemObjHelper {
public:
static const uint64_t extraFlags;
static const uint64_t extraFlagsIntel;
static const uint64_t commonFlags;
static const uint64_t commonFlagsIntel;
static const uint64_t validFlagsForBuffer;
static const uint64_t validFlagsForBufferIntel;
static const uint64_t validFlagsForImage;
static const uint64_t validFlagsForImageIntel;
static bool validateMemoryPropertiesForBuffer(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel);
static bool validateMemoryPropertiesForImage(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, cl_mem parent);
static AllocationProperties getAllocationPropertiesWithImageInfo(uint32_t rootDeviceIndex, ImageInfo &imgInfo, bool allocateMemory, const MemoryPropertiesFlags &memoryProperties);
static bool checkMemFlagsForSubBuffer(cl_mem_flags flags);
static SVMAllocsManager::SvmAllocationProperties getSvmAllocationProperties(cl_mem_flags flags);
static bool isSuitableForRenderCompression(bool renderCompressed, const MemoryPropertiesFlags &properties, Context &context, bool preferCompression);
protected:
static bool validateExtraMemoryProperties(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel);
};
} // namespace NEO

View File

@@ -0,0 +1,107 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "mem_obj/mem_obj_helper.h"
namespace NEO {
bool MemObjHelper::validateMemoryPropertiesForBuffer(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel) {
/* Check all the invalid flags combination. */
if ((isValueSet(flags, CL_MEM_READ_WRITE | CL_MEM_READ_ONLY)) ||
(isValueSet(flags, CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY)) ||
(isValueSet(flags, CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY)) ||
(isValueSet(flags, CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR)) ||
(isValueSet(flags, CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) ||
(isValueSet(flags, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)) ||
(isValueSet(flags, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY)) ||
(isValueSet(flags, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS))) {
return false;
}
return validateExtraMemoryProperties(memoryProperties, flags, flagsIntel);
}
bool MemObjHelper::validateMemoryPropertiesForImage(const MemoryPropertiesFlags &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, cl_mem parent) {
/* Check all the invalid flags combination. */
if ((!isValueSet(flags, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) &&
(isValueSet(flags, CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY) ||
isValueSet(flags, CL_MEM_READ_WRITE | CL_MEM_READ_ONLY) ||
isValueSet(flags, CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY) ||
isValueSet(flags, CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_HOST_PTR) ||
isValueSet(flags, CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR) ||
isValueSet(flags, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY) ||
isValueSet(flags, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS) ||
isValueSet(flags, CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS) ||
isValueSet(flags, CL_MEM_NO_ACCESS_INTEL | CL_MEM_READ_WRITE) ||
isValueSet(flags, CL_MEM_NO_ACCESS_INTEL | CL_MEM_WRITE_ONLY) ||
isValueSet(flags, CL_MEM_NO_ACCESS_INTEL | CL_MEM_READ_ONLY))) {
return false;
}
auto parentMemObj = castToObject<MemObj>(parent);
if (parentMemObj != nullptr && flags) {
auto parentFlags = parentMemObj->getMemoryPropertiesFlags();
/* Check whether flags are compatible with parent. */
if (isValueSet(flags, CL_MEM_ALLOC_HOST_PTR) ||
isValueSet(flags, CL_MEM_COPY_HOST_PTR) ||
isValueSet(flags, CL_MEM_USE_HOST_PTR) ||
((!isValueSet(parentFlags, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) &&
(!isValueSet(flags, CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) &&
((isValueSet(parentFlags, CL_MEM_WRITE_ONLY) && isValueSet(flags, CL_MEM_READ_WRITE)) ||
(isValueSet(parentFlags, CL_MEM_WRITE_ONLY) && isValueSet(flags, CL_MEM_READ_ONLY)) ||
(isValueSet(parentFlags, CL_MEM_READ_ONLY) && isValueSet(flags, CL_MEM_READ_WRITE)) ||
(isValueSet(parentFlags, CL_MEM_READ_ONLY) && isValueSet(flags, CL_MEM_WRITE_ONLY)) ||
(isValueSet(parentFlags, CL_MEM_NO_ACCESS_INTEL) && isValueSet(flags, CL_MEM_READ_WRITE)) ||
(isValueSet(parentFlags, CL_MEM_NO_ACCESS_INTEL) && isValueSet(flags, CL_MEM_WRITE_ONLY)) ||
(isValueSet(parentFlags, CL_MEM_NO_ACCESS_INTEL) && isValueSet(flags, CL_MEM_READ_ONLY)) ||
(isValueSet(parentFlags, CL_MEM_HOST_NO_ACCESS) && isValueSet(flags, CL_MEM_HOST_WRITE_ONLY)) ||
(isValueSet(parentFlags, CL_MEM_HOST_NO_ACCESS) && isValueSet(flags, CL_MEM_HOST_READ_ONLY))))) {
return false;
}
}
return validateExtraMemoryProperties(memoryProperties, flags, flagsIntel);
}
AllocationProperties MemObjHelper::getAllocationPropertiesWithImageInfo(uint32_t rootDeviceIndex, ImageInfo &imgInfo, bool allocateMemory, const MemoryPropertiesFlags &memoryProperties) {
AllocationProperties allocationProperties{rootDeviceIndex, allocateMemory, imgInfo, GraphicsAllocation::AllocationType::IMAGE};
MemoryPropertiesParser::fillPoliciesInProperties(allocationProperties, memoryProperties);
return allocationProperties;
}
bool MemObjHelper::checkMemFlagsForSubBuffer(cl_mem_flags flags) {
const cl_mem_flags allValidFlags =
CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY |
CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
return isFieldValid(flags, allValidFlags);
}
SVMAllocsManager::SvmAllocationProperties MemObjHelper::getSvmAllocationProperties(cl_mem_flags flags) {
SVMAllocsManager::SvmAllocationProperties svmProperties;
svmProperties.coherent = isValueSet(flags, CL_MEM_SVM_FINE_GRAIN_BUFFER);
svmProperties.hostPtrReadOnly = isValueSet(flags, CL_MEM_HOST_READ_ONLY) || isValueSet(flags, CL_MEM_HOST_NO_ACCESS);
svmProperties.readOnly = isValueSet(flags, CL_MEM_READ_ONLY);
return svmProperties;
}
const uint64_t MemObjHelper::commonFlags = extraFlags | CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY |
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR |
CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
const uint64_t MemObjHelper::commonFlagsIntel = extraFlagsIntel | CL_MEM_LOCALLY_UNCACHED_RESOURCE |
CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE | CL_MEM_48BIT_RESOURCE_INTEL;
const uint64_t MemObjHelper::validFlagsForBuffer = commonFlags | CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL;
const uint64_t MemObjHelper::validFlagsForBufferIntel = commonFlagsIntel | CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL;
const uint64_t MemObjHelper::validFlagsForImage = commonFlags | CL_MEM_NO_ACCESS_INTEL | CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL | CL_MEM_FORCE_LINEAR_STORAGE_INTEL;
const uint64_t MemObjHelper::validFlagsForImageIntel = commonFlagsIntel;
} // namespace NEO

View File

@@ -0,0 +1,123 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "mem_obj/pipe.h"
#include "core/helpers/get_info.h"
#include "core/memory_manager/memory_manager.h"
#include "context/context.h"
#include "device/cl_device.h"
#include "helpers/get_info_status_mapper.h"
#include "helpers/memory_properties_flags_helpers.h"
#include "mem_obj/mem_obj_helper.h"
namespace NEO {
Pipe::Pipe(Context *context,
cl_mem_flags flags,
cl_uint packetSize,
cl_uint maxPackets,
const cl_pipe_properties *properties,
void *memoryStorage,
GraphicsAllocation *gfxAllocation)
: MemObj(context,
CL_MEM_OBJECT_PIPE,
MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0),
flags,
0,
static_cast<size_t>(packetSize * (maxPackets + 1) + intelPipeHeaderReservedSpace),
memoryStorage,
nullptr,
gfxAllocation,
false,
false,
false),
pipePacketSize(packetSize),
pipeMaxPackets(maxPackets) {
magic = objectMagic;
}
Pipe *Pipe::create(Context *context,
cl_mem_flags flags,
cl_uint packetSize,
cl_uint maxPackets,
const cl_pipe_properties *properties,
cl_int &errcodeRet) {
Pipe *pPipe = nullptr;
errcodeRet = CL_SUCCESS;
MemoryManager *memoryManager = context->getMemoryManager();
DEBUG_BREAK_IF(!memoryManager);
MemoryPropertiesFlags memoryPropertiesFlags = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(flags, 0, 0);
while (true) {
auto size = static_cast<size_t>(packetSize * (maxPackets + 1) + intelPipeHeaderReservedSpace);
auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex();
AllocationProperties allocProperties =
MemoryPropertiesParser::getAllocationProperties(rootDeviceIndex, memoryPropertiesFlags, true, size, GraphicsAllocation::AllocationType::PIPE, false);
GraphicsAllocation *memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties);
if (!memory) {
errcodeRet = CL_OUT_OF_HOST_MEMORY;
break;
}
pPipe = new (std::nothrow) Pipe(context, flags, packetSize, maxPackets, properties, memory->getUnderlyingBuffer(), memory);
if (!pPipe) {
memoryManager->freeGraphicsMemory(memory);
memory = nullptr;
errcodeRet = CL_OUT_OF_HOST_MEMORY;
break;
}
// Initialize pipe_control_intel_t structure located at the beginning of the surface
memset(memory->getUnderlyingBuffer(), 0, intelPipeHeaderReservedSpace);
*reinterpret_cast<unsigned int *>(memory->getUnderlyingBuffer()) = maxPackets + 1;
break;
}
return pPipe;
}
cl_int Pipe::getPipeInfo(cl_image_info paramName,
size_t paramValueSize,
void *paramValue,
size_t *paramValueSizeRet) {
cl_int retVal;
size_t srcParamSize = 0;
void *srcParam = nullptr;
switch (paramName) {
case CL_PIPE_PACKET_SIZE:
srcParamSize = sizeof(cl_uint);
srcParam = &(pipePacketSize);
break;
case CL_PIPE_MAX_PACKETS:
srcParamSize = sizeof(cl_uint);
srcParam = &(pipeMaxPackets);
break;
default:
break;
}
retVal = changeGetInfoStatusToCLResultType(::getInfo(paramValue, paramValueSize, srcParam, srcParamSize));
if (paramValueSizeRet) {
*paramValueSizeRet = srcParamSize;
}
return retVal;
}
void Pipe::setPipeArg(void *memory, uint32_t patchSize) {
patchWithRequiredSize(memory, patchSize, static_cast<uintptr_t>(getGraphicsAllocation()->getGpuAddressToPatch()));
}
Pipe::~Pipe() = default;
} // namespace NEO

View File

@@ -0,0 +1,46 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "mem_obj/buffer.h"
namespace NEO {
class Pipe : public MemObj {
public:
static const size_t intelPipeHeaderReservedSpace = 128;
static const cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL;
static const cl_ulong objectMagic = MemObj::objectMagic | 0x03;
static Pipe *create(
Context *context,
cl_mem_flags flags,
cl_uint packetSize,
cl_uint maxPackets,
const cl_pipe_properties *properties,
cl_int &errcodeRet);
~Pipe() override;
cl_int getPipeInfo(cl_image_info paramName,
size_t paramValueSize,
void *paramValue,
size_t *paramValueSizeRet);
void setPipeArg(void *memory, uint32_t patchSize);
protected:
Pipe(Context *context,
cl_mem_flags flags,
cl_uint packetSize,
cl_uint maxPackets,
const cl_pipe_properties *properties,
void *memoryStorage,
GraphicsAllocation *gfxAllocation);
cl_uint pipePacketSize;
cl_uint pipeMaxPackets;
};
} // namespace NEO