/* * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "opencl/source/mem_obj/buffer.h" #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/device/device.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/local_memory_access_modes.h" #include "shared/source/memory_manager/host_ptr_manager.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/os_interface/os_interface.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/helpers/cl_validators.h" #include "opencl/source/mem_obj/mem_obj_helper.h" #include "opencl/source/sharings/unified/unified_buffer.h" namespace NEO { BufferFactoryFuncs bufferFactory[IGFX_MAX_CORE] = {}; namespace BufferFunctions { ValidateInputAndCreateBufferFunc validateInputAndCreateBuffer = Buffer::validateInputAndCreateBuffer; } // namespace BufferFunctions Buffer::Buffer(Context *context, const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, MultiGraphicsAllocation &&multiGraphicsAllocation, bool zeroCopy, bool isHostPtrSVM, bool isObjectRedescribed) : MemObj(context, CL_MEM_OBJECT_BUFFER, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, std::move(multiGraphicsAllocation), zeroCopy, isHostPtrSVM, isObjectRedescribed) { magic = objectMagic; setHostPtrMinSize(size); } Buffer::Buffer() : MemObj(nullptr, CL_MEM_OBJECT_BUFFER, {}, 0, 0, 0, nullptr, nullptr, 0, false, false, false) { } Buffer::~Buffer() = default; bool Buffer::isSubBuffer() { return this->associatedMemObject != nullptr; } bool Buffer::isValidSubBufferOffset(size_t offset) { if (multiGraphicsAllocation.getDefaultGraphicsAllocation()->isCompressionEnabled()) { // From spec: "origin value is aligned to the CL_DEVICE_MEM_BASE_ADDR_ALIGN value" if (!isAligned(offset, this->getContext()->getDevice(0)->getDeviceInfo().memBaseAddressAlign / 8u)) { return false; } } cl_uint addressAlign = 32; // 4 byte alignment if ((offset & (addressAlign / 8 - 1)) == 0) { return true; } return false; } cl_mem Buffer::validateInputAndCreateBuffer(cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, cl_int &retVal) { Context *pContext = nullptr; retVal = validateObjects(withCastToInternal(context, &pContext)); if (retVal != CL_SUCCESS) { return nullptr; } MemoryProperties memoryProperties{}; cl_mem_alloc_flags_intel allocflags = 0; cl_mem_flags_intel emptyFlagsIntel = 0; if ((false == ClMemoryPropertiesHelper::parseMemoryProperties(nullptr, memoryProperties, flags, emptyFlagsIntel, allocflags, MemoryPropertiesHelper::ObjType::BUFFER, *pContext)) || (false == MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, emptyFlagsIntel, *pContext))) { retVal = CL_INVALID_VALUE; return nullptr; } if ((false == ClMemoryPropertiesHelper::parseMemoryProperties(properties, memoryProperties, flags, flagsIntel, allocflags, MemoryPropertiesHelper::ObjType::BUFFER, *pContext)) || (false == MemObjHelper::validateMemoryPropertiesForBuffer(memoryProperties, flags, flagsIntel, *pContext))) { retVal = CL_INVALID_PROPERTY; return nullptr; } auto pDevice = pContext->getDevice(0); bool allowCreateBuffersWithUnrestrictedSize = isValueSet(flags, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL) || isValueSet(flagsIntel, CL_MEM_ALLOW_UNRESTRICTED_SIZE_INTEL) || DebugManager.flags.AllowUnrestrictedSize.get(); if (size == 0 || (size > pDevice->getDevice().getDeviceInfo().maxMemAllocSize && !allowCreateBuffersWithUnrestrictedSize)) { retVal = CL_INVALID_BUFFER_SIZE; return nullptr; } /* Check the host ptr and data */ bool expectHostPtr = (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) != 0; if ((hostPtr == nullptr) == expectHostPtr) { retVal = CL_INVALID_HOST_PTR; return nullptr; } // create the buffer Buffer *pBuffer = nullptr; UnifiedSharingMemoryDescription extMem{}; if (memoryProperties.handle) { if (validateHandleType(memoryProperties, extMem)) { extMem.handle = reinterpret_cast(memoryProperties.handle); extMem.size = size; pBuffer = UnifiedBuffer::createSharedUnifiedBuffer(pContext, flags, extMem, &retVal); } else { retVal = CL_INVALID_PROPERTY; return nullptr; } } else { pBuffer = create(pContext, memoryProperties, flags, flagsIntel, size, hostPtr, retVal); } if (retVal == CL_SUCCESS) { pBuffer->storeProperties(properties); } return pBuffer; } Buffer *Buffer::create(Context *context, cl_mem_flags flags, size_t size, void *hostPtr, AdditionalBufferCreateArgs &bufferCreateArgs, cl_int &errcodeRet) { return create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, size, hostPtr, bufferCreateArgs, errcodeRet); } Buffer *Buffer::create(Context *context, cl_mem_flags flags, size_t size, void *hostPtr, cl_int &errcodeRet) { AdditionalBufferCreateArgs bufferCreateArgs{}; return create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, size, hostPtr, bufferCreateArgs, errcodeRet); } bool inline copyHostPointer(Buffer *buffer, size_t size, void *hostPtr, GraphicsAllocation *memory, GraphicsAllocation *mapAllocation, uint32_t rootDeviceIndex, bool isCompressionEnabled, bool implicitScalingEnabled, cl_int &errcodeRet) { const bool isLocalMemory = !MemoryPoolHelper::isSystemMemoryPool(memory->getMemoryPool()); const bool gpuCopyRequired = isCompressionEnabled || isLocalMemory; if (gpuCopyRequired) { auto context = buffer->getContext(); auto &device = context->getDevice(0u)->getDevice(); auto &hwInfo = device.getHardwareInfo(); auto &productHelper = device.getProductHelper(); auto &osInterface = device.getRootDeviceEnvironment().osInterface; bool isLockable = true; if (osInterface) { isLockable = osInterface->isLockablePointer(memory->storageInfo.isLockable); } bool copyOnCpuAllowed = implicitScalingEnabled == false && size <= Buffer::maxBufferSizeForCopyOnCpu && isCompressionEnabled == false && productHelper.getLocalMemoryAccessMode(hwInfo) != LocalMemoryAccessMode::CpuAccessDisallowed && isLockable; if (DebugManager.flags.CopyHostPtrOnCpu.get() != -1) { copyOnCpuAllowed = DebugManager.flags.CopyHostPtrOnCpu.get() == 1; } if (auto lockedPointer = copyOnCpuAllowed ? device.getMemoryManager()->lockResource(memory) : nullptr) { memcpy_s(ptrOffset(lockedPointer, buffer->getOffset()), size, hostPtr, size); memory->setAubWritable(true, GraphicsAllocation::defaultBank); memory->setTbxWritable(true, GraphicsAllocation::defaultBank); return true; } else { auto blitMemoryToAllocationResult = BlitOperationResult::Unsupported; if (productHelper.isBlitterFullySupported(hwInfo) && isLocalMemory) { blitMemoryToAllocationResult = BlitHelperFunctions::blitMemoryToAllocation(device, memory, buffer->getOffset(), hostPtr, {size, 1, 1}); } if (blitMemoryToAllocationResult != BlitOperationResult::Success) { auto cmdQ = context->getSpecialQueue(rootDeviceIndex); if (CL_SUCCESS != cmdQ->enqueueWriteBuffer(buffer, CL_TRUE, buffer->getOffset(), size, hostPtr, mapAllocation, 0, nullptr, nullptr)) { errcodeRet = CL_OUT_OF_RESOURCES; return false; } } return true; } } else { memcpy_s(ptrOffset(memory->getUnderlyingBuffer(), buffer->getOffset()), size, hostPtr, size); return true; } return false; } Buffer *Buffer::create(Context *context, const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, cl_int &errcodeRet) { AdditionalBufferCreateArgs bufferCreateArgs{}; return create(context, memoryProperties, flags, flagsIntel, size, hostPtr, bufferCreateArgs, errcodeRet); } Buffer *Buffer::create(Context *context, const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *hostPtr, AdditionalBufferCreateArgs &bufferCreateArgs, cl_int &errcodeRet) { errcodeRet = CL_SUCCESS; Context::BufferPoolAllocator &bufferPoolAllocator = context->getBufferPoolAllocator(); const bool implicitScalingEnabled = ImplicitScalingHelper::isImplicitScalingEnabled(context->getDevice(0u)->getDeviceBitfield(), true); const bool useHostPtr = memoryProperties.flags.useHostPtr; const bool copyHostPtr = memoryProperties.flags.copyHostPtr; if (implicitScalingEnabled == false && useHostPtr == false && memoryProperties.flags.forceHostMemory == false) { cl_int poolAllocRet = CL_SUCCESS; auto bufferFromPool = bufferPoolAllocator.allocateBufferFromPool(memoryProperties, flags, flagsIntel, size, hostPtr, poolAllocRet); if (CL_SUCCESS == poolAllocRet) { const bool needsCopy = copyHostPtr; if (needsCopy) { for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { auto graphicsAllocation = bufferFromPool->getGraphicsAllocation(rootDeviceIndex); auto mapAllocation = bufferFromPool->getMapAllocation(rootDeviceIndex); bool isCompressionEnabled = graphicsAllocation->isCompressionEnabled(); if (copyHostPointer(bufferFromPool, size, hostPtr, graphicsAllocation, mapAllocation, rootDeviceIndex, isCompressionEnabled, implicitScalingEnabled, poolAllocRet)) { break; } } } if (!needsCopy || poolAllocRet == CL_SUCCESS) { return bufferFromPool; } else { clReleaseMemObject(bufferFromPool); } } } MemoryManager *memoryManager = context->getMemoryManager(); UNRECOVERABLE_IF(!memoryManager); auto maxRootDeviceIndex = context->getMaxRootDeviceIndex(); auto multiGraphicsAllocation = MultiGraphicsAllocation(maxRootDeviceIndex); AllocationInfoType allocationInfos; allocationInfos.resize(maxRootDeviceIndex + 1ull); void *allocationCpuPtr = nullptr; bool forceCopyHostPtr = false; for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { allocationInfos[rootDeviceIndex] = {}; auto &allocationInfo = allocationInfos[rootDeviceIndex]; auto &executionEnvironment = memoryManager->peekExecutionEnvironment(); auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]; auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); auto &gfxCoreHelper = rootDeviceEnvironment.getHelper(); bool compressionEnabled = MemObjHelper::isSuitableForCompression(GfxCoreHelper::compressedBuffersSupported(*hwInfo), memoryProperties, *context, gfxCoreHelper.isBufferSizeSuitableForCompression(size)); allocationInfo.allocationType = getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, *context, compressionEnabled, memoryManager->isLocalMemorySupported(rootDeviceIndex)); if (allocationCpuPtr) { forceCopyHostPtr = !useHostPtr && !copyHostPtr; checkMemory(memoryProperties, size, allocationCpuPtr, errcodeRet, allocationInfo.alignementSatisfied, allocationInfo.copyMemoryFromHostPtr, memoryManager, rootDeviceIndex, forceCopyHostPtr); } else { checkMemory(memoryProperties, size, hostPtr, errcodeRet, allocationInfo.alignementSatisfied, allocationInfo.copyMemoryFromHostPtr, memoryManager, rootDeviceIndex, false); } if (errcodeRet != CL_SUCCESS) { cleanAllGraphicsAllocations(*context, *memoryManager, allocationInfos, false); return nullptr; } if (compressionEnabled) { allocationInfo.zeroCopyAllowed = false; allocationInfo.allocateMemory = true; } if (useHostPtr) { if (allocationInfo.allocationType == AllocationType::BUFFER_HOST_MEMORY) { if (allocationInfo.alignementSatisfied) { allocationInfo.zeroCopyAllowed = true; allocationInfo.allocateMemory = false; } else { allocationInfo.zeroCopyAllowed = false; allocationInfo.allocateMemory = true; } } if (DebugManager.flags.DisableZeroCopyForUseHostPtr.get()) { allocationInfo.zeroCopyAllowed = false; allocationInfo.allocateMemory = true; } auto svmManager = context->getSVMAllocsManager(); if (svmManager) { auto svmData = svmManager->getSVMAlloc(hostPtr); if (svmData) { allocationInfo.memory = svmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); allocationInfo.allocationType = allocationInfo.memory->getAllocationType(); allocationInfo.isHostPtrSVM = true; allocationInfo.zeroCopyAllowed = allocationInfo.memory->getAllocationType() == AllocationType::SVM_ZERO_COPY; allocationInfo.copyMemoryFromHostPtr = false; allocationInfo.allocateMemory = false; allocationInfo.mapAllocation = svmData->cpuAllocation; } } } if (context->isSharedContext) { allocationInfo.zeroCopyAllowed = true; allocationInfo.copyMemoryFromHostPtr = false; allocationInfo.allocateMemory = false; } if (!bufferCreateArgs.doNotProvidePerformanceHints && hostPtr && context->isProvidingPerformanceHints()) { if (allocationInfo.zeroCopyAllowed) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_MEETS_ALIGNMENT_RESTRICTIONS, hostPtr, size); } else { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, CL_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS, hostPtr, size, MemoryConstants::pageSize, MemoryConstants::pageSize); } } if (DebugManager.flags.DisableZeroCopyForBuffers.get()) { allocationInfo.zeroCopyAllowed = false; } if (!bufferCreateArgs.doNotProvidePerformanceHints && allocationInfo.allocateMemory && context->isProvidingPerformanceHints()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_NEEDS_ALLOCATE_MEMORY); } if (!allocationInfo.memory) { if (allocationCpuPtr) { allocationInfo.allocateMemory = false; } AllocationProperties allocProperties = MemoryPropertiesHelper::getAllocationProperties(rootDeviceIndex, memoryProperties, allocationInfo.allocateMemory, size, allocationInfo.allocationType, context->areMultiStorageAllocationsPreferred(), *hwInfo, context->getDeviceBitfieldForAllocation(rootDeviceIndex), context->isSingleDeviceContext()); allocProperties.flags.crossRootDeviceAccess = context->getRootDeviceIndices().size() > 1; allocProperties.flags.preferCompressed = compressionEnabled; allocProperties.makeDeviceBufferLockable = bufferCreateArgs.makeAllocationLockable; if (allocationCpuPtr) { allocationInfo.memory = memoryManager->createGraphicsAllocationFromExistingStorage(allocProperties, allocationCpuPtr, multiGraphicsAllocation); } else { allocationInfo.memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties, hostPtr); if (allocationInfo.memory) { allocationCpuPtr = allocationInfo.memory->getUnderlyingBuffer(); } } } auto isSystemMemory = allocationInfo.memory && MemoryPoolHelper::isSystemMemoryPool(allocationInfo.memory->getMemoryPool()); if (allocationInfo.allocateMemory && isSystemMemory) { memoryManager->addAllocationToHostPtrManager(allocationInfo.memory); } // if allocation failed for CL_MEM_USE_HOST_PTR case retry with non zero copy path if (useHostPtr && !allocationInfo.memory && Buffer::isReadOnlyMemoryPermittedByFlags(memoryProperties)) { allocationInfo.allocationType = AllocationType::BUFFER_HOST_MEMORY; allocationInfo.zeroCopyAllowed = false; allocationInfo.copyMemoryFromHostPtr = true; AllocationProperties allocProperties = MemoryPropertiesHelper::getAllocationProperties(rootDeviceIndex, memoryProperties, true, // allocateMemory size, allocationInfo.allocationType, context->areMultiStorageAllocationsPreferred(), *hwInfo, context->getDeviceBitfieldForAllocation(rootDeviceIndex), context->isSingleDeviceContext()); allocProperties.flags.crossRootDeviceAccess = context->getRootDeviceIndices().size() > 1; allocationInfo.memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties); } if (!allocationInfo.memory) { errcodeRet = CL_OUT_OF_HOST_MEMORY; cleanAllGraphicsAllocations(*context, *memoryManager, allocationInfos, false); return nullptr; } if (!isSystemMemory) { allocationInfo.zeroCopyAllowed = false; if (hostPtr) { if (!allocationInfo.isHostPtrSVM) { allocationInfo.copyMemoryFromHostPtr = true; } } } else if (allocationInfo.allocationType == AllocationType::BUFFER && !compressionEnabled) { allocationInfo.allocationType = AllocationType::BUFFER_HOST_MEMORY; } allocationInfo.memory->setAllocationType(allocationInfo.allocationType); auto isWritable = !(memoryProperties.flags.readOnly || memoryProperties.flags.hostReadOnly || memoryProperties.flags.hostNoAccess); allocationInfo.memory->setMemObjectsAllocationWithWritableFlags(isWritable); multiGraphicsAllocation.addAllocation(allocationInfo.memory); if (forceCopyHostPtr) { allocationInfo.copyMemoryFromHostPtr = false; } } multiGraphicsAllocation.setMultiStorage(MemoryPropertiesHelper::useMultiStorageForCrossRootDeviceAccess(context->getRootDeviceIndices().size() > 1)); auto rootDeviceIndex = context->getDevice(0u)->getRootDeviceIndex(); auto &allocationInfo = allocationInfos[rootDeviceIndex]; auto memoryStorage = multiGraphicsAllocation.getDefaultGraphicsAllocation()->getUnderlyingBuffer(); auto pBuffer = createBufferHw(context, memoryProperties, flags, flagsIntel, size, memoryStorage, (useHostPtr) ? hostPtr : nullptr, std::move(multiGraphicsAllocation), allocationInfo.zeroCopyAllowed, allocationInfo.isHostPtrSVM, false); if (!pBuffer) { errcodeRet = CL_OUT_OF_HOST_MEMORY; cleanAllGraphicsAllocations(*context, *memoryManager, allocationInfos, false); return nullptr; } DBG_LOG(LogMemoryObject, __FUNCTION__, "Created Buffer: Handle: ", pBuffer, ", hostPtr: ", hostPtr, ", size: ", size, ", memoryStorage: ", allocationInfo.memory->getUnderlyingBuffer(), ", GPU address: ", allocationInfo.memory->getGpuAddress(), ", memoryPool: ", getMemoryPoolString(allocationInfo.memory)); bool copyExecuted = false; for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { auto &allocationInfo = allocationInfos[rootDeviceIndex]; if (useHostPtr) { if (!allocationInfo.zeroCopyAllowed && !allocationInfo.isHostPtrSVM) { AllocationProperties properties{rootDeviceIndex, false, // allocateMemory size, AllocationType::MAP_ALLOCATION, false, // isMultiStorageAllocation context->getDeviceBitfieldForAllocation(rootDeviceIndex)}; properties.flags.flushL3RequiredForRead = properties.flags.flushL3RequiredForWrite = true; allocationInfo.mapAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties, hostPtr); } } auto isCompressionEnabled = allocationInfo.memory->isCompressionEnabled(); Buffer::provideCompressionHint(isCompressionEnabled, context, pBuffer); if (allocationInfo.mapAllocation) { pBuffer->mapAllocations.addAllocation(allocationInfo.mapAllocation); } pBuffer->setHostPtrMinSize(size); if (allocationInfo.copyMemoryFromHostPtr && !copyExecuted) { copyExecuted = copyHostPointer(pBuffer, size, hostPtr, allocationInfo.memory, allocationInfo.mapAllocation, rootDeviceIndex, isCompressionEnabled, implicitScalingEnabled, errcodeRet); } } if (errcodeRet != CL_SUCCESS) { pBuffer->release(); return nullptr; } if (DebugManager.flags.MakeAllBuffersResident.get()) { for (size_t deviceNum = 0u; deviceNum < context->getNumDevices(); deviceNum++) { auto device = context->getDevice(deviceNum); auto graphicsAllocation = pBuffer->getGraphicsAllocation(device->getRootDeviceIndex()); auto rootDeviceEnvironment = pBuffer->executionEnvironment->rootDeviceEnvironments[device->getRootDeviceIndex()].get(); rootDeviceEnvironment->memoryOperationsInterface->makeResident(&device->getDevice(), ArrayRef(&graphicsAllocation, 1)); } } return pBuffer; } Buffer *Buffer::createSharedBuffer(Context *context, cl_mem_flags flags, SharingHandler *sharingHandler, MultiGraphicsAllocation multiGraphicsAllocation) { auto rootDeviceIndex = context->getDevice(0)->getRootDeviceIndex(); auto size = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex)->getUnderlyingBufferSize(); auto sharedBuffer = createBufferHw( context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), flags, 0, size, nullptr, nullptr, std::move(multiGraphicsAllocation), false, false, false); sharedBuffer->setSharingHandler(sharingHandler); return sharedBuffer; } void Buffer::checkMemory(const MemoryProperties &memoryProperties, size_t size, void *hostPtr, cl_int &errcodeRet, bool &alignementSatisfied, bool ©MemoryFromHostPtr, MemoryManager *memoryManager, uint32_t rootDeviceIndex, bool forceCopyHostPtr) { errcodeRet = CL_SUCCESS; alignementSatisfied = true; copyMemoryFromHostPtr = false; uintptr_t minAddress = 0; auto memRestrictions = memoryManager->getAlignedMallocRestrictions(); if (memRestrictions) { minAddress = memRestrictions->minAddress; } if (hostPtr) { if (!(memoryProperties.flags.useHostPtr || memoryProperties.flags.copyHostPtr || forceCopyHostPtr)) { errcodeRet = CL_INVALID_HOST_PTR; return; } } if (memoryProperties.flags.useHostPtr) { if (hostPtr) { auto fragment = memoryManager->getHostPtrManager()->getFragment({hostPtr, rootDeviceIndex}); if (fragment && fragment->driverAllocation) { errcodeRet = CL_INVALID_HOST_PTR; return; } if (alignUp(hostPtr, MemoryConstants::cacheLineSize) != hostPtr || alignUp(size, MemoryConstants::cacheLineSize) != size || minAddress > reinterpret_cast(hostPtr)) { alignementSatisfied = false; copyMemoryFromHostPtr = true; } } else { errcodeRet = CL_INVALID_HOST_PTR; } } if (memoryProperties.flags.copyHostPtr || forceCopyHostPtr) { if (hostPtr) { copyMemoryFromHostPtr = true; } else { errcodeRet = CL_INVALID_HOST_PTR; } } } AllocationType Buffer::getGraphicsAllocationTypeAndCompressionPreference(const MemoryProperties &properties, Context &context, bool &compressionEnabled, bool isLocalMemoryEnabled) { if (context.isSharedContext || properties.flags.forceHostMemory) { compressionEnabled = false; return AllocationType::BUFFER_HOST_MEMORY; } if (properties.flags.useHostPtr && !isLocalMemoryEnabled) { compressionEnabled = false; return AllocationType::BUFFER_HOST_MEMORY; } return AllocationType::BUFFER; } bool Buffer::isReadOnlyMemoryPermittedByFlags(const MemoryProperties &properties) { // Host won't access or will only read and kernel will only read return (properties.flags.hostNoAccess || properties.flags.hostReadOnly) && properties.flags.readOnly; } Buffer *Buffer::createSubBuffer(cl_mem_flags flags, cl_mem_flags_intel flagsIntel, const cl_buffer_region *region, cl_int &errcodeRet) { DEBUG_BREAK_IF(nullptr == createFunction); MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, &this->context->getDevice(0)->getDevice()); auto copyMultiGraphicsAllocation = MultiGraphicsAllocation{this->multiGraphicsAllocation}; auto buffer = createFunction(this->context, memoryProperties, flags, 0, region->size, this->memoryStorage ? ptrOffset(this->memoryStorage, region->origin) : nullptr, this->hostPtr ? ptrOffset(this->hostPtr, region->origin) : nullptr, std::move(copyMultiGraphicsAllocation), this->isZeroCopy, this->isHostPtrSVM, false); if (this->context->isProvidingPerformanceHints()) { this->context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, SUBBUFFER_SHARES_MEMORY, static_cast(this)); } buffer->associatedMemObject = this; buffer->offset = region->origin + this->offset; buffer->setParentSharingHandler(this->getSharingHandler()); this->incRefInternal(); errcodeRet = CL_SUCCESS; return buffer; } uint64_t Buffer::setArgStateless(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex, bool set32BitAddressing) { // Subbuffers have offset that graphicsAllocation is not aware of auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); uintptr_t addressToPatch = ((set32BitAddressing) ? static_cast(graphicsAllocation->getGpuAddressToPatch()) : static_cast(graphicsAllocation->getGpuAddress())) + this->offset; DEBUG_BREAK_IF(!(graphicsAllocation->isLocked() || (addressToPatch != 0) || (graphicsAllocation->getGpuBaseAddress() != 0) || (this->getCpuAddress() == nullptr && graphicsAllocation->peekSharedHandle()))); patchWithRequiredSize(memory, patchSize, addressToPatch); return addressToPatch; } bool Buffer::bufferRectPitchSet(const size_t *bufferOrigin, const size_t *region, size_t &bufferRowPitch, size_t &bufferSlicePitch, size_t &hostRowPitch, size_t &hostSlicePitch, bool isSrcBuffer) { if (bufferRowPitch == 0) bufferRowPitch = region[0]; if (bufferSlicePitch == 0) bufferSlicePitch = region[1] * bufferRowPitch; if (hostRowPitch == 0) hostRowPitch = region[0]; if (hostSlicePitch == 0) hostSlicePitch = region[1] * hostRowPitch; if (region[0] == 0 || region[1] == 0 || region[2] == 0) { return false; } if (bufferRowPitch < region[0] || hostRowPitch < region[0]) { return false; } if ((bufferSlicePitch < region[1] * bufferRowPitch || bufferSlicePitch % bufferRowPitch != 0) || (hostSlicePitch < region[1] * hostRowPitch || hostSlicePitch % hostRowPitch != 0)) { return false; } auto slicePitch = isSrcBuffer ? bufferSlicePitch : hostSlicePitch; auto rowPitch = isSrcBuffer ? bufferRowPitch : hostRowPitch; if ((bufferOrigin[2] + region[2] - 1) * slicePitch + (bufferOrigin[1] + region[1] - 1) * rowPitch + bufferOrigin[0] + region[0] > this->getSize()) { return false; } return true; } void Buffer::transferData(void *dst, void *src, size_t copySize, size_t copyOffset) { DBG_LOG(LogMemoryObject, __FUNCTION__, " hostPtr: ", hostPtr, ", size: ", copySize, ", offset: ", copyOffset, ", memoryStorage: ", memoryStorage); auto dstPtr = ptrOffset(dst, copyOffset); auto srcPtr = ptrOffset(src, copyOffset); memcpy_s(dstPtr, copySize, srcPtr, copySize); } void Buffer::transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { transferData(hostPtr, memoryStorage, copySize[0], copyOffset[0]); } void Buffer::transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { transferData(memoryStorage, hostPtr, copySize[0], copyOffset[0]); } size_t Buffer::calculateHostPtrSize(const size_t *origin, const size_t *region, size_t rowPitch, size_t slicePitch) { size_t hostPtrOffsetInBytes = origin[2] * slicePitch + origin[1] * rowPitch + origin[0]; size_t hostPtrRegionSizeInbytes = region[0] + rowPitch * (region[1] - 1) + slicePitch * (region[2] - 1); size_t hostPtrSize = hostPtrOffsetInBytes + hostPtrRegionSizeInbytes; return hostPtrSize; } bool Buffer::isReadWriteOnCpuAllowed(const Device &device) { if (forceDisallowCPUCopy) { return false; } auto rootDeviceIndex = device.getRootDeviceIndex(); if (this->isCompressed(rootDeviceIndex)) { return false; } auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); if (graphicsAllocation->peekSharedHandle() != 0) { return false; } if (graphicsAllocation->isAllocatedInLocalMemoryPool()) { return false; } return true; } bool Buffer::isReadWriteOnCpuPreferred(void *ptr, size_t size, const Device &device) { auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(device.getRootDeviceIndex()); if (MemoryPoolHelper::isSystemMemoryPool(graphicsAllocation->getMemoryPool())) { // if buffer is not zero copy and pointer is aligned it will be more beneficial to do the transfer on GPU if (!isMemObjZeroCopy() && (reinterpret_cast(ptr) & (MemoryConstants::cacheLineSize - 1)) == 0) { return false; } // on low power devices larger transfers are better on the GPU if (device.getSpecializedDevice()->getDeviceInfo().platformLP && size > maxBufferSizeForReadWriteOnCpu) { return false; } return true; } return false; } Buffer *Buffer::createBufferHw(Context *context, const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, MultiGraphicsAllocation &&multiGraphicsAllocation, bool zeroCopy, bool isHostPtrSVM, bool isImageRedescribed) { const auto device = context->getDevice(0); const auto &hwInfo = device->getHardwareInfo(); auto funcCreate = bufferFactory[hwInfo.platform.eRenderCoreFamily].createBufferFunction; DEBUG_BREAK_IF(nullptr == funcCreate); auto pBuffer = funcCreate(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, std::move(multiGraphicsAllocation), zeroCopy, isHostPtrSVM, isImageRedescribed); DEBUG_BREAK_IF(nullptr == pBuffer); if (pBuffer) { pBuffer->createFunction = funcCreate; } return pBuffer; } Buffer *Buffer::createBufferHwFromDevice(const Device *device, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, size_t size, void *memoryStorage, void *hostPtr, MultiGraphicsAllocation &&multiGraphicsAllocation, size_t offset, bool zeroCopy, bool isHostPtrSVM, bool isImageRedescribed) { const auto &hwInfo = device->getHardwareInfo(); auto funcCreate = bufferFactory[hwInfo.platform.eRenderCoreFamily].createBufferFunction; DEBUG_BREAK_IF(nullptr == funcCreate); MemoryProperties memoryProperties = ClMemoryPropertiesHelper::createMemoryProperties(flags, flagsIntel, 0, device); auto pBuffer = funcCreate(nullptr, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, std::move(multiGraphicsAllocation), zeroCopy, isHostPtrSVM, isImageRedescribed); pBuffer->offset = offset; pBuffer->executionEnvironment = device->getExecutionEnvironment(); return pBuffer; } uint32_t Buffer::getMocsValue(bool disableL3Cache, bool isReadOnlyArgument, uint32_t rootDeviceIndex) const { uint64_t bufferAddress = 0; size_t bufferSize = 0; auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); if (graphicsAllocation) { bufferAddress = graphicsAllocation->getGpuAddress(); bufferSize = graphicsAllocation->getUnderlyingBufferSize(); } else { bufferAddress = reinterpret_cast(getHostPtr()); bufferSize = getSize(); } bufferAddress += this->offset; bool readOnlyMemObj = isValueSet(getFlags(), CL_MEM_READ_ONLY) || isReadOnlyArgument; bool alignedMemObj = isAligned(bufferAddress) && isAligned(bufferSize); auto gmmHelper = executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->getGmmHelper(); if (!disableL3Cache && !isMemObjUncacheableForSurfaceState() && (alignedMemObj || readOnlyMemObj || !isMemObjZeroCopy())) { return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); } else { return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); } } uint32_t Buffer::getSurfaceSize(bool alignSizeForAuxTranslation, uint32_t rootDeviceIndex) const { auto bufferAddress = getBufferAddress(rootDeviceIndex); auto bufferAddressAligned = alignDown(bufferAddress, 4); auto bufferOffset = ptrDiff(bufferAddress, bufferAddressAligned); uint32_t surfaceSize = static_cast(alignUp(getSize() + bufferOffset, alignSizeForAuxTranslation ? 512 : 4)); return surfaceSize; } uint64_t Buffer::getBufferAddress(uint32_t rootDeviceIndex) const { // The graphics allocation for Host Ptr surface will be created in makeResident call and GPU address is expected to be the same as CPU address auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex); auto bufferAddress = (graphicsAllocation != nullptr) ? graphicsAllocation->getGpuAddress() : castToUint64(getHostPtr()); bufferAddress += this->offset; return bufferAddress; } bool Buffer::isCompressed(uint32_t rootDeviceIndex) const { return multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex)->isCompressionEnabled(); } void Buffer::setSurfaceState(const Device *device, void *surfaceState, bool forceNonAuxMode, bool disableL3, size_t svmSize, void *svmPtr, size_t offset, GraphicsAllocation *gfxAlloc, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) { auto multiGraphicsAllocation = MultiGraphicsAllocation(device->getRootDeviceIndex()); if (gfxAlloc) { multiGraphicsAllocation.addAllocation(gfxAlloc); } auto buffer = Buffer::createBufferHwFromDevice(device, flags, flagsIntel, svmSize, svmPtr, svmPtr, std::move(multiGraphicsAllocation), offset, true, false, false); buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device, useGlobalAtomics, areMultipleSubDevicesInContext); delete buffer; } void Buffer::provideCompressionHint(bool compressionEnabled, Context *context, Buffer *buffer) { if (context->isProvidingPerformanceHints() && GfxCoreHelper::compressedBuffersSupported(context->getDevice(0)->getHardwareInfo())) { if (compressionEnabled) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, BUFFER_IS_COMPRESSED, buffer); } else { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, BUFFER_IS_NOT_COMPRESSED, buffer); } } } } // namespace NEO