mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Increase chunk alignment from 256 to 512. Restores performance in some workloads with pool enabled but lowers maximum possible number of buffers in pool from 256 to 128. MemObj size will keep the value passed to clCreateBuffer ie. will not be aligned up by chunk alignment. CL_MEM_SIZE will now return same value as with pool disabled. Related-To: NEO-7332 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
458 lines
17 KiB
C++
458 lines
17 KiB
C++
/*
|
|
* Copyright (C) 2018-2023 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
*/
|
|
|
|
#include "opencl/source/mem_obj/mem_obj.h"
|
|
|
|
#include "shared/source/command_stream/command_stream_receiver.h"
|
|
#include "shared/source/compiler_interface/compiler_cache.h"
|
|
#include "shared/source/gmm_helper/gmm.h"
|
|
#include "shared/source/gmm_helper/resource_info.h"
|
|
#include "shared/source/helpers/aligned_memory.h"
|
|
#include "shared/source/helpers/bit_helpers.h"
|
|
#include "shared/source/helpers/get_info.h"
|
|
#include "shared/source/memory_manager/allocation_properties.h"
|
|
#include "shared/source/memory_manager/deferred_deleter.h"
|
|
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
|
#include "shared/source/memory_manager/memory_manager.h"
|
|
#include "shared/source/os_interface/os_context.h"
|
|
|
|
#include "opencl/source/cl_device/cl_device.h"
|
|
#include "opencl/source/command_queue/command_queue.h"
|
|
#include "opencl/source/context/context.h"
|
|
#include "opencl/source/helpers/get_info_status_mapper.h"
|
|
#include "opencl/source/helpers/mipmap.h"
|
|
|
|
#include <algorithm>
|
|
|
|
namespace NEO {
|
|
|
|
MemObj::MemObj(Context *context,
|
|
cl_mem_object_type memObjectType,
|
|
const MemoryProperties &memoryProperties,
|
|
cl_mem_flags flags,
|
|
cl_mem_flags_intel flagsIntel,
|
|
size_t size,
|
|
void *memoryStorage,
|
|
void *hostPtr,
|
|
MultiGraphicsAllocation &&multiGraphicsAllocation,
|
|
bool zeroCopy,
|
|
bool isHostPtrSVM,
|
|
bool isObjectRedescribed)
|
|
: context(context), memObjectType(memObjectType), memoryProperties(memoryProperties), flags(flags), flagsIntel(flagsIntel), size(size),
|
|
memoryStorage(memoryStorage), hostPtr(hostPtr),
|
|
isZeroCopy(zeroCopy), isHostPtrSVM(isHostPtrSVM), isObjectRedescribed(isObjectRedescribed),
|
|
multiGraphicsAllocation(std::move(multiGraphicsAllocation)),
|
|
mapAllocations(static_cast<uint32_t>(this->multiGraphicsAllocation.getGraphicsAllocations().size() - 1)) {
|
|
if (context) {
|
|
context->incRefInternal();
|
|
memoryManager = context->getMemoryManager();
|
|
auto device = context->getDevice(0);
|
|
executionEnvironment = device->getExecutionEnvironment();
|
|
}
|
|
}
|
|
|
|
MemObj::~MemObj() {
|
|
if (!context) {
|
|
return;
|
|
}
|
|
|
|
bool needWait = false;
|
|
|
|
if (allocatedMapPtr != nullptr) {
|
|
needWait = true;
|
|
}
|
|
|
|
if (auto mapOperationsHandler = getMapOperationsHandlerIfExists(); mapOperationsHandler != nullptr) {
|
|
if (mapOperationsHandler->size() > 0 && !getCpuAddressForMapping()) {
|
|
needWait = true;
|
|
}
|
|
context->getMapOperationsStorage().removeHandler(this);
|
|
}
|
|
|
|
if (!destructorCallbacks.empty()) {
|
|
needWait = true;
|
|
}
|
|
|
|
if (!isObjectRedescribed) {
|
|
if (peekSharingHandler()) {
|
|
peekSharingHandler()->releaseReusedGraphicsAllocation();
|
|
}
|
|
|
|
needWait |= multiGraphicsAllocation.getGraphicsAllocations().size() > 1u;
|
|
for (auto graphicsAllocation : multiGraphicsAllocation.getGraphicsAllocations()) {
|
|
auto rootDeviceIndex = graphicsAllocation ? graphicsAllocation->getRootDeviceIndex() : 0;
|
|
bool doAsyncDestructions = DebugManager.flags.EnableAsyncDestroyAllocations.get();
|
|
if (graphicsAllocation && !associatedMemObject && !isHostPtrSVM && graphicsAllocation->peekReuseCount() == 0) {
|
|
memoryManager->removeAllocationFromHostPtrManager(graphicsAllocation);
|
|
if (!doAsyncDestructions) {
|
|
needWait = true;
|
|
}
|
|
if (needWait && graphicsAllocation->isUsed()) {
|
|
memoryManager->waitForEnginesCompletion(*graphicsAllocation);
|
|
}
|
|
destroyGraphicsAllocation(graphicsAllocation, doAsyncDestructions);
|
|
graphicsAllocation = nullptr;
|
|
} else if (graphicsAllocation && context->getBufferPoolAllocator().isPoolBuffer(associatedMemObject)) {
|
|
memoryManager->waitForEnginesCompletion(*graphicsAllocation);
|
|
}
|
|
if (!associatedMemObject) {
|
|
releaseMapAllocation(rootDeviceIndex, doAsyncDestructions);
|
|
}
|
|
if (mcsAllocation) {
|
|
destroyGraphicsAllocation(mcsAllocation, false);
|
|
}
|
|
}
|
|
if (associatedMemObject) {
|
|
associatedMemObject->decRefInternal();
|
|
context->getBufferPoolAllocator().tryFreeFromPoolBuffer(associatedMemObject, this->offset, this->sizeInPoolAllocator);
|
|
}
|
|
if (!associatedMemObject) {
|
|
releaseAllocatedMapPtr();
|
|
}
|
|
}
|
|
|
|
destructorCallbacks.invoke(this);
|
|
|
|
const bool needDecrementContextRefCount = !context->getBufferPoolAllocator().isPoolBuffer(this);
|
|
if (needDecrementContextRefCount) {
|
|
context->decRefInternal();
|
|
}
|
|
}
|
|
|
|
cl_int MemObj::getMemObjectInfo(cl_mem_info paramName,
|
|
size_t paramValueSize,
|
|
void *paramValue,
|
|
size_t *paramValueSizeRet) {
|
|
cl_int retVal;
|
|
size_t srcParamSize = GetInfo::invalidSourceSize;
|
|
void *srcParam = nullptr;
|
|
cl_bool usesSVMPointer;
|
|
cl_uint refCnt = 0;
|
|
cl_uint mapCount = 0;
|
|
cl_mem clAssociatedMemObject = static_cast<cl_mem>(this->associatedMemObject);
|
|
cl_context ctx = nullptr;
|
|
uint64_t internalHandle = 0llu;
|
|
auto allocation = getMultiGraphicsAllocation().getDefaultGraphicsAllocation();
|
|
cl_bool usesCompression;
|
|
|
|
switch (paramName) {
|
|
case CL_MEM_TYPE:
|
|
srcParamSize = sizeof(memObjectType);
|
|
srcParam = &memObjectType;
|
|
break;
|
|
|
|
case CL_MEM_FLAGS:
|
|
srcParamSize = sizeof(flags);
|
|
srcParam = &flags;
|
|
break;
|
|
|
|
case CL_MEM_SIZE:
|
|
srcParamSize = sizeof(size);
|
|
srcParam = &size;
|
|
break;
|
|
|
|
case CL_MEM_HOST_PTR:
|
|
srcParamSize = sizeof(hostPtr);
|
|
srcParam = &hostPtr;
|
|
break;
|
|
|
|
case CL_MEM_CONTEXT:
|
|
srcParamSize = sizeof(context);
|
|
ctx = context;
|
|
srcParam = &ctx;
|
|
break;
|
|
|
|
case CL_MEM_USES_SVM_POINTER:
|
|
usesSVMPointer = isHostPtrSVM && isValueSet(flags, CL_MEM_USE_HOST_PTR);
|
|
srcParamSize = sizeof(cl_bool);
|
|
srcParam = &usesSVMPointer;
|
|
break;
|
|
|
|
case CL_MEM_OFFSET:
|
|
if (nullptr != this->associatedMemObject) {
|
|
if (this->getContext()->getBufferPoolAllocator().isPoolBuffer(this->associatedMemObject)) {
|
|
offset = 0;
|
|
} else {
|
|
offset -= this->associatedMemObject->getOffset();
|
|
}
|
|
}
|
|
srcParamSize = sizeof(offset);
|
|
srcParam = &offset;
|
|
break;
|
|
|
|
case CL_MEM_ASSOCIATED_MEMOBJECT:
|
|
if (this->getContext()->getBufferPoolAllocator().isPoolBuffer(this->associatedMemObject)) {
|
|
clAssociatedMemObject = nullptr;
|
|
}
|
|
srcParamSize = sizeof(clAssociatedMemObject);
|
|
srcParam = &clAssociatedMemObject;
|
|
break;
|
|
|
|
case CL_MEM_MAP_COUNT:
|
|
srcParamSize = sizeof(mapCount);
|
|
mapCount = static_cast<cl_uint>(getMapOperationsHandler().size());
|
|
srcParam = &mapCount;
|
|
break;
|
|
|
|
case CL_MEM_REFERENCE_COUNT:
|
|
refCnt = static_cast<cl_uint>(this->getReference());
|
|
srcParamSize = sizeof(refCnt);
|
|
srcParam = &refCnt;
|
|
break;
|
|
|
|
case CL_MEM_ALLOCATION_HANDLE_INTEL:
|
|
multiGraphicsAllocation.getDefaultGraphicsAllocation()->peekInternalHandle(this->memoryManager, internalHandle);
|
|
srcParamSize = sizeof(internalHandle);
|
|
srcParam = &internalHandle;
|
|
break;
|
|
|
|
case CL_MEM_USES_COMPRESSION_INTEL:
|
|
usesCompression = allocation->isCompressionEnabled();
|
|
srcParam = &usesCompression;
|
|
srcParamSize = sizeof(cl_bool);
|
|
break;
|
|
|
|
case CL_MEM_PROPERTIES:
|
|
srcParamSize = propertiesVector.size() * sizeof(cl_mem_properties);
|
|
srcParam = propertiesVector.data();
|
|
break;
|
|
|
|
default:
|
|
getOsSpecificMemObjectInfo(paramName, &srcParamSize, &srcParam);
|
|
break;
|
|
}
|
|
|
|
auto getInfoStatus = GetInfo::getInfo(paramValue, paramValueSize, srcParam, srcParamSize);
|
|
retVal = changeGetInfoStatusToCLResultType(getInfoStatus);
|
|
GetInfo::setParamValueReturnSize(paramValueSizeRet, srcParamSize, getInfoStatus);
|
|
|
|
return retVal;
|
|
}
|
|
|
|
cl_int MemObj::setDestructorCallback(void(CL_CALLBACK *funcNotify)(cl_mem, void *),
|
|
void *userData) {
|
|
std::unique_lock<std::mutex> theLock(mtx);
|
|
destructorCallbacks.add(funcNotify, userData);
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
void *MemObj::getCpuAddress() const {
|
|
return memoryStorage;
|
|
}
|
|
|
|
void *MemObj::getHostPtr() const {
|
|
return hostPtr;
|
|
}
|
|
|
|
size_t MemObj::getSize() const {
|
|
return size;
|
|
}
|
|
|
|
void MemObj::setAllocatedMapPtr(void *allocatedMapPtr) {
|
|
this->allocatedMapPtr = allocatedMapPtr;
|
|
}
|
|
|
|
bool MemObj::isMemObjZeroCopy() const {
|
|
return isZeroCopy;
|
|
}
|
|
|
|
bool MemObj::isMemObjWithHostPtrSVM() const {
|
|
return isHostPtrSVM;
|
|
}
|
|
|
|
bool MemObj::isMemObjUncacheable() const {
|
|
return isValueSet(flagsIntel, CL_MEM_LOCALLY_UNCACHED_RESOURCE);
|
|
}
|
|
|
|
bool MemObj::isMemObjUncacheableForSurfaceState() const {
|
|
return isAnyBitSet(flagsIntel, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE | CL_MEM_LOCALLY_UNCACHED_RESOURCE);
|
|
}
|
|
|
|
GraphicsAllocation *MemObj::getGraphicsAllocation(uint32_t rootDeviceIndex) const {
|
|
return multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex);
|
|
}
|
|
|
|
void MemObj::checkUsageAndReleaseOldAllocation(uint32_t rootDeviceIndex) {
|
|
auto graphicsAllocation = getGraphicsAllocation(rootDeviceIndex);
|
|
if (graphicsAllocation != nullptr && (peekSharingHandler() == nullptr || graphicsAllocation->peekReuseCount() == 0)) {
|
|
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(graphicsAllocation);
|
|
}
|
|
}
|
|
|
|
void MemObj::resetGraphicsAllocation(GraphicsAllocation *newGraphicsAllocation) {
|
|
TakeOwnershipWrapper<MemObj> lock(*this);
|
|
checkUsageAndReleaseOldAllocation(newGraphicsAllocation->getRootDeviceIndex());
|
|
multiGraphicsAllocation.addAllocation(newGraphicsAllocation);
|
|
}
|
|
|
|
void MemObj::removeGraphicsAllocation(uint32_t rootDeviceIndex) {
|
|
TakeOwnershipWrapper<MemObj> lock(*this);
|
|
checkUsageAndReleaseOldAllocation(rootDeviceIndex);
|
|
multiGraphicsAllocation.removeAllocation(rootDeviceIndex);
|
|
}
|
|
|
|
bool MemObj::readMemObjFlagsInvalid() {
|
|
return isValueSet(flags, CL_MEM_HOST_WRITE_ONLY) || isValueSet(flags, CL_MEM_HOST_NO_ACCESS);
|
|
}
|
|
|
|
bool MemObj::writeMemObjFlagsInvalid() {
|
|
return isValueSet(flags, CL_MEM_HOST_READ_ONLY) || isValueSet(flags, CL_MEM_HOST_NO_ACCESS);
|
|
}
|
|
|
|
bool MemObj::mapMemObjFlagsInvalid(cl_map_flags mapFlags) {
|
|
return (writeMemObjFlagsInvalid() && (mapFlags & CL_MAP_WRITE)) ||
|
|
(readMemObjFlagsInvalid() && (mapFlags & CL_MAP_READ));
|
|
}
|
|
|
|
void MemObj::setHostPtrMinSize(size_t size) {
|
|
hostPtrMinSize = size;
|
|
}
|
|
|
|
void *MemObj::getCpuAddressForMapping() {
|
|
void *ptrToReturn = nullptr;
|
|
if (isValueSet(flags, CL_MEM_USE_HOST_PTR)) {
|
|
ptrToReturn = this->hostPtr;
|
|
} else {
|
|
ptrToReturn = this->memoryStorage;
|
|
}
|
|
return ptrToReturn;
|
|
}
|
|
void *MemObj::getCpuAddressForMemoryTransfer() {
|
|
void *ptrToReturn = nullptr;
|
|
if (isValueSet(flags, CL_MEM_USE_HOST_PTR) && this->isMemObjZeroCopy()) {
|
|
ptrToReturn = this->hostPtr;
|
|
} else {
|
|
ptrToReturn = this->memoryStorage;
|
|
}
|
|
return ptrToReturn;
|
|
}
|
|
void MemObj::releaseAllocatedMapPtr() {
|
|
if (allocatedMapPtr) {
|
|
DEBUG_BREAK_IF(isValueSet(flags, CL_MEM_USE_HOST_PTR));
|
|
memoryManager->freeSystemMemory(allocatedMapPtr);
|
|
}
|
|
allocatedMapPtr = nullptr;
|
|
}
|
|
|
|
void MemObj::releaseMapAllocation(uint32_t rootDeviceIndex, bool asyncDestroy) {
|
|
auto mapAllocation = mapAllocations.getGraphicsAllocation(rootDeviceIndex);
|
|
if (mapAllocation && !isHostPtrSVM) {
|
|
if (asyncDestroy && !isValueSet(flags, CL_MEM_USE_HOST_PTR)) {
|
|
destroyGraphicsAllocation(mapAllocation, true);
|
|
} else {
|
|
if (mapAllocation->isUsed()) {
|
|
memoryManager->waitForEnginesCompletion(*mapAllocation);
|
|
}
|
|
destroyGraphicsAllocation(mapAllocation, false);
|
|
}
|
|
}
|
|
}
|
|
|
|
void MemObj::destroyGraphicsAllocation(GraphicsAllocation *allocation, bool asyncDestroy) {
|
|
if (asyncDestroy) {
|
|
memoryManager->checkGpuUsageAndDestroyGraphicsAllocations(allocation);
|
|
} else {
|
|
memoryManager->freeGraphicsMemory(allocation);
|
|
}
|
|
}
|
|
|
|
bool MemObj::checkIfMemoryTransferIsRequired(size_t offsetInMemObject, size_t offsetInHostPtr, const void *hostPtr, cl_command_type cmdType) {
|
|
auto bufferStorage = ptrOffset(this->getCpuAddressForMemoryTransfer(), offsetInMemObject);
|
|
auto hostStorage = ptrOffset(hostPtr, offsetInHostPtr);
|
|
auto isMemTransferNeeded = !((bufferStorage == hostStorage) &&
|
|
(cmdType == CL_COMMAND_WRITE_BUFFER || cmdType == CL_COMMAND_READ_BUFFER ||
|
|
cmdType == CL_COMMAND_WRITE_BUFFER_RECT || cmdType == CL_COMMAND_READ_BUFFER_RECT ||
|
|
cmdType == CL_COMMAND_WRITE_IMAGE || cmdType == CL_COMMAND_READ_IMAGE));
|
|
return isMemTransferNeeded;
|
|
}
|
|
|
|
void *MemObj::getBasePtrForMap(uint32_t rootDeviceIndex) {
|
|
if (associatedMemObject) {
|
|
return associatedMemObject->getBasePtrForMap(rootDeviceIndex);
|
|
}
|
|
if (getFlags() & CL_MEM_USE_HOST_PTR) {
|
|
return getHostPtr();
|
|
} else {
|
|
TakeOwnershipWrapper<MemObj> memObjOwnership(*this);
|
|
if (getMapAllocation(rootDeviceIndex)) {
|
|
return getMapAllocation(rootDeviceIndex)->getUnderlyingBuffer();
|
|
} else {
|
|
auto memory = getAllocatedMapPtr();
|
|
if (!memory) {
|
|
memory = memoryManager->allocateSystemMemory(getSize(), MemoryConstants::pageSize);
|
|
setAllocatedMapPtr(memory);
|
|
}
|
|
AllocationProperties properties{rootDeviceIndex,
|
|
false, // allocateMemory
|
|
getSize(), AllocationType::MAP_ALLOCATION,
|
|
false, // isMultiStorageAllocation
|
|
context->getDeviceBitfieldForAllocation(rootDeviceIndex)};
|
|
|
|
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, memory);
|
|
setMapAllocation(allocation);
|
|
return getAllocatedMapPtr();
|
|
}
|
|
}
|
|
}
|
|
|
|
MapOperationsHandler &MemObj::getMapOperationsHandler() {
|
|
return context->getMapOperationsStorage().getHandler(this);
|
|
}
|
|
|
|
MapOperationsHandler *MemObj::getMapOperationsHandlerIfExists() {
|
|
return context->getMapOperationsStorage().getHandlerIfExists(this);
|
|
}
|
|
|
|
bool MemObj::addMappedPtr(void *ptr, size_t ptrLength, cl_map_flags &mapFlags,
|
|
MemObjSizeArray &size, MemObjOffsetArray &offset,
|
|
uint32_t mipLevel, GraphicsAllocation *graphicsAllocation) {
|
|
return getMapOperationsHandler().add(ptr, ptrLength, mapFlags, size, offset, mipLevel, graphicsAllocation);
|
|
}
|
|
|
|
bool MemObj::findMappedPtr(void *mappedPtr, MapInfo &outMapInfo) {
|
|
return getMapOperationsHandler().find(mappedPtr, outMapInfo);
|
|
}
|
|
|
|
void MemObj::removeMappedPtr(void *mappedPtr) {
|
|
getMapOperationsHandler().remove(mappedPtr);
|
|
}
|
|
|
|
bool MemObj::isTiledAllocation() const {
|
|
auto graphicsAllocation = multiGraphicsAllocation.getDefaultGraphicsAllocation();
|
|
auto gmm = graphicsAllocation->getDefaultGmm();
|
|
return gmm && (gmm->gmmResourceInfo->getTileModeSurfaceState() != 0);
|
|
}
|
|
|
|
bool MemObj::mappingOnCpuAllowed() const {
|
|
auto graphicsAllocation = multiGraphicsAllocation.getDefaultGraphicsAllocation();
|
|
return !isTiledAllocation() && !peekSharingHandler() && !isMipMapped(this) && !DebugManager.flags.DisableZeroCopyForBuffers.get() &&
|
|
!graphicsAllocation->isCompressionEnabled() && MemoryPoolHelper::isSystemMemoryPool(graphicsAllocation->getMemoryPool());
|
|
}
|
|
|
|
void MemObj::storeProperties(const cl_mem_properties *properties) {
|
|
if (properties) {
|
|
for (size_t i = 0; properties[i] != 0; i += 2) {
|
|
propertiesVector.push_back(properties[i]);
|
|
propertiesVector.push_back(properties[i + 1]);
|
|
}
|
|
propertiesVector.push_back(0);
|
|
}
|
|
}
|
|
|
|
void MemObj::cleanAllGraphicsAllocations(Context &context, MemoryManager &memoryManager, AllocationInfoType &allocationInfo, bool isParentObject) {
|
|
if (!isParentObject) {
|
|
for (auto &index : context.getRootDeviceIndices()) {
|
|
if (allocationInfo[index].memory) {
|
|
memoryManager.removeAllocationFromHostPtrManager(allocationInfo[index].memory);
|
|
memoryManager.freeGraphicsMemory(allocationInfo[index].memory);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace NEO
|