diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 4885b028d7..553accda9d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -26,33 +26,34 @@ template ze_result_t CommandListCoreFamilyImmediate::executeCommandListImmediateWithFlushTask(bool performMigration) { NEO::DispatchFlags dispatchFlags( - {}, //csrDependencies - nullptr, //barrierTimestampPacketNodes - {}, //pipelineSelectArgs - nullptr, //flushStampReference - NEO::QueueThrottle::MEDIUM, //throttle - this->getCommandListPreemptionMode(), //preemptionMode - this->commandContainer.lastSentNumGrfRequired, //numGrfRequired - NEO::L3CachingSettings::l3CacheOn, //l3CacheSettings - this->getThreadArbitrationPolicy(), //threadArbitrationPolicy - NEO::AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo - NEO::KernelExecutionType::NotApplicable, //kernelExecutionType - NEO::MemoryCompressionState::NotApplicable, //memoryCompressionState - NEO::QueueSliceCount::defaultSliceCount, //sliceCount - this->isSyncModeQueue, //blocking - this->isSyncModeQueue, //dcFlush - this->getCommandListSLMEnable(), //useSLM - this->isSyncModeQueue, //guardCommandBufferWithPipeControl - false, //GSBA32BitRequired - false, //requiresCoherency - false, //lowPriority - true, //implicitFlush - this->csr->isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed - false, //epilogueRequired - false, //usePerDssBackedBuffer - false, //useSingleSubdevice - false, //useGlobalAtomics - this->device->getNEODevice()->getNumAvailableDevices() //numDevicesInContext + {}, //csrDependencies + nullptr, //barrierTimestampPacketNodes + {}, //pipelineSelectArgs + nullptr, //flushStampReference + NEO::QueueThrottle::MEDIUM, //throttle + this->getCommandListPreemptionMode(), //preemptionMode + this->commandContainer.lastSentNumGrfRequired, //numGrfRequired + NEO::L3CachingSettings::l3CacheOn, //l3CacheSettings + this->getThreadArbitrationPolicy(), //threadArbitrationPolicy + NEO::AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo + NEO::KernelExecutionType::NotApplicable, //kernelExecutionType + NEO::MemoryCompressionState::NotApplicable, //memoryCompressionState + NEO::QueueSliceCount::defaultSliceCount, //sliceCount + this->isSyncModeQueue, //blocking + this->isSyncModeQueue, //dcFlush + this->getCommandListSLMEnable(), //useSLM + this->isSyncModeQueue, //guardCommandBufferWithPipeControl + false, //GSBA32BitRequired + false, //requiresCoherency + false, //lowPriority + true, //implicitFlush + this->csr->isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed + false, //epilogueRequired + false, //usePerDssBackedBuffer + false, //useSingleSubdevice + false, //useGlobalAtomics + this->device->getNEODevice()->getNumAvailableDevices() > 1, //areMultipleSubDevicesInContext + false //memoryMigrationRequired ); this->commandContainer.removeDuplicatesFromResidencyContainer(); diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 3ca27a71a3..170924e1e7 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -35,6 +35,7 @@ #include "opencl/source/helpers/task_information.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" +#include "opencl/source/memory_manager/migration_controller.h" #include "opencl/source/program/block_kernel_manager.h" #include "opencl/source/program/printf_handler.h" @@ -289,6 +290,14 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, const EnqueueProperties enqueueProperties(false, !multiDispatchInfo.empty(), isCacheFlushCommand(commandType), flushDependenciesForNonKernelCommand, isMarkerWithProfiling, &blitPropertiesContainer); + bool migratedMemory = false; + + if (!blockQueue && multiDispatchInfo.peekMainKernel() && multiDispatchInfo.peekMainKernel()->requiresMemoryMigration()) { + for (auto &arg : multiDispatchInfo.peekMainKernel()->getMemObjectsToMigrate()) { + MigrationController::handleMigration(*this->context, getGpgpuCommandStreamReceiver(), arg.second); + migratedMemory = true; + } + } if (!blockQueue) { if (parentKernel) { processDeviceEnqueue(devQueueHw, multiDispatchInfo, hwTimeStamps, blocking); @@ -395,6 +404,9 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, if (blocking) { waitUntilComplete(blockQueue, (blockQueue ? nullptr : printfHandler.get())); } + if (migratedMemory) { + getGpgpuCommandStreamReceiver().flushBatchedSubmissions(); + } } template @@ -860,7 +872,8 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( usePerDssBackedBuffer, //usePerDssBackedBuffer kernel->isSingleSubdevicePreferred(), //useSingleSubdevice useGlobalAtomics, //useGlobalAtomics - kernel->areMultipleSubDevicesInContext() //areMultipleSubDevicesInContext + kernel->areMultipleSubDevicesInContext(), //areMultipleSubDevicesInContext + kernel->requiresMemoryMigration() //memoryMigrationRequired ); dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired; @@ -1054,6 +1067,7 @@ CompletionStamp CommandQueueHw::enqueueCommandWithoutKernel( surface->makeResident(getGpgpuCommandStreamReceiver()); } + auto rootDeviceIndex = getDevice().getRootDeviceIndex(); DispatchFlags dispatchFlags( {}, //csrDependencies ×tampPacketDependencies.barrierNodes, //barrierTimestampPacketNodes @@ -1081,7 +1095,8 @@ CompletionStamp CommandQueueHw::enqueueCommandWithoutKernel( false, //usePerDssBackedBuffer false, //useSingleSubdevice false, //useGlobalAtomics - 1u); //numDevicesInContext + context->containsMultipleSubDevices(rootDeviceIndex), //areMultipleSubDevicesInContext + false); //memoryMigrationRequired if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr); diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index e66789f2d5..0894ff2e12 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -77,7 +77,8 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) { false, //usePerDssBackedBuffer false, //useSingleSubdevice false, //useGlobalAtomics - 1u); //numDevicesInContext + false, //areMultipleSubDevicesInContext + false); //memoryMigrationRequired DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady); @@ -243,7 +244,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate kernel->requiresPerDssBackedBuffer(), //usePerDssBackedBuffer kernel->isSingleSubdevicePreferred(), //useSingleSubdevice kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics - kernel->areMultipleSubDevicesInContext()); //areMultipleSubDevicesInContext + kernel->areMultipleSubDevicesInContext(), //areMultipleSubDevicesInContext + kernel->requiresMemoryMigration()); //memoryMigrationRequired if (commandQueue.getContext().getRootDeviceIndices().size() > 1) { eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver); @@ -269,6 +271,12 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate gtpinNotifyPreFlushTask(&commandQueue); + if (kernel->requiresMemoryMigration()) { + for (auto &arg : kernel->getMemObjectsToMigrate()) { + MigrationController::handleMigration(commandQueue.getContext(), commandStreamReceiver, arg.second); + } + } + completionStamp = commandStreamReceiver.flushTask(*kernelOperation->commandStream, 0, *dsh, @@ -351,34 +359,36 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate } } + auto rootDeviceIndex = commandStreamReceiver.getRootDeviceIndex(); DispatchFlags dispatchFlags( - {}, //csrDependencies - barrierNodes, //barrierTimestampPacketNodes - {}, //pipelineSelectArgs - commandQueue.flushStamp->getStampReference(), //flushStampReference - commandQueue.getThrottle(), //throttle - commandQueue.getDevice().getPreemptionMode(), //preemptionMode - GrfConfig::NotApplicable, //numGrfRequired - L3CachingSettings::NotApplicable, //l3CacheSettings - ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy - AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo - KernelExecutionType::NotApplicable, //kernelExecutionType - MemoryCompressionState::NotApplicable, //memoryCompressionState - commandQueue.getSliceCount(), //sliceCount - true, //blocking - false, //dcFlush - false, //useSLM - true, //guardCommandBufferWithPipeControl - false, //GSBA32BitRequired - false, //requiresCoherency - commandQueue.getPriority() == QueuePriority::LOW, //lowPriority - false, //implicitFlush - commandStreamReceiver.isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed - false, //epilogueRequired - false, //usePerDssBackedBuffer - false, //useSingleSubdevice - false, //useGlobalAtomics - 1u); //numDevicesInContext + {}, //csrDependencies + barrierNodes, //barrierTimestampPacketNodes + {}, //pipelineSelectArgs + commandQueue.flushStamp->getStampReference(), //flushStampReference + commandQueue.getThrottle(), //throttle + commandQueue.getDevice().getPreemptionMode(), //preemptionMode + GrfConfig::NotApplicable, //numGrfRequired + L3CachingSettings::NotApplicable, //l3CacheSettings + ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy + AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo + KernelExecutionType::NotApplicable, //kernelExecutionType + MemoryCompressionState::NotApplicable, //memoryCompressionState + commandQueue.getSliceCount(), //sliceCount + true, //blocking + false, //dcFlush + false, //useSLM + true, //guardCommandBufferWithPipeControl + false, //GSBA32BitRequired + false, //requiresCoherency + commandQueue.getPriority() == QueuePriority::LOW, //lowPriority + false, //implicitFlush + commandStreamReceiver.isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed + false, //epilogueRequired + false, //usePerDssBackedBuffer + false, //useSingleSubdevice + false, //useGlobalAtomics + commandQueue.getContext().containsMultipleSubDevices(rootDeviceIndex), //areMultipleSubDevicesInContext + false); //memoryMigrationRequired UNRECOVERABLE_IF(!kernelOperation->blitEnqueue && !commandStreamReceiver.peekTimestampPacketWriteEnabled() && commandQueue.getContext().getRootDeviceIndices().size() == 1); diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index f4342a4ec8..0a2cdcc6fc 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -773,6 +773,20 @@ void Kernel::markArgPatchedAndResolveArgs(uint32_t argIndex) { patchedArgumentsNum++; kernelArguments[argIndex].isPatched = true; } + if (program->getContextPtr() && getContext().getRootDeviceIndices().size() > 1u && Kernel::isMemObj(kernelArguments[argIndex].type) && kernelArguments[argIndex].object) { + auto argMemObj = castToObjectOrAbort(reinterpret_cast(kernelArguments[argIndex].object)); + auto memObj = argMemObj->getHighestRootMemObj(); + auto migrateRequiredForArg = memObj->getMultiGraphicsAllocation().requiresMigrations(); + + if (migratableArgsMap.find(argIndex) == migratableArgsMap.end() && migrateRequiredForArg) { + migratableArgsMap.insert({argIndex, memObj}); + } else if (migrateRequiredForArg) { + migratableArgsMap[argIndex] = memObj; + } else { + migratableArgsMap.erase(argIndex); + } + } + resolveArgs(); } diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index cc15d1cd4a..44bd1f00e2 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -424,6 +424,8 @@ class Kernel : public ReferenceTrackedObject { void setMultiDeviceKernel(MultiDeviceKernel *pMultiDeviceKernelToSet) { pMultiDeviceKernel = pMultiDeviceKernelToSet; } bool areMultipleSubDevicesInContext() const; + bool requiresMemoryMigration() const { return migratableArgsMap.size() > 0; } + const std::map &getMemObjectsToMigrate() const { return migratableArgsMap; } protected: struct ObjectCounts { @@ -549,6 +551,7 @@ class Kernel : public ReferenceTrackedObject { std::vector patchInfoDataList; std::unique_ptr imageTransformer; + std::map migratableArgsMap{}; bool specialPipelineSelectMode = false; bool svmAllocationsRequireCacheFlush = false; diff --git a/opencl/source/mem_obj/image.cpp b/opencl/source/mem_obj/image.cpp index a199cfc17a..47931f22c3 100644 --- a/opencl/source/mem_obj/image.cpp +++ b/opencl/source/mem_obj/image.cpp @@ -137,6 +137,7 @@ Image *Image::create(Context *context, auto &defaultHwHelper = HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily); + bool transferedMemory = false; do { size_t imageWidth = imageDesc->image_width; size_t imageHeight = 1; @@ -239,8 +240,6 @@ Image *Image::create(Context *context, AllocationInfoType allocationInfo; allocationInfo.resize(maxRootDeviceIndex + 1u); bool isParentObject = parentBuffer || parentImage; - void *cpuPtr = nullptr; - void *hostPtrForced = nullptr; for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { allocationInfo[rootDeviceIndex] = {}; @@ -290,7 +289,6 @@ Image *Image::create(Context *context, false, // allocateMemory memoryProperties, hwInfo, context->getDeviceBitfieldForAllocation(rootDeviceIndex)); - allocProperties.flags.crossRootDeviceAccess = context->getRootDeviceIndices().size() > 1; allocationInfo[rootDeviceIndex].memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties, hostPtr); @@ -325,39 +323,11 @@ Image *Image::create(Context *context, } } } else { - if (context->getRootDeviceIndices().size() > 1) { - MemoryProperties memoryPropertiesToSet = memoryProperties; - memoryPropertiesToSet.flags.useHostPtr = true; - memoryPropertiesToSet.flags.copyHostPtr = false; - - if (cpuPtr) { - AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(rootDeviceIndex, imgInfo, - false, // allocateMemory - const_cast(memoryPropertiesToSet), hwInfo, - context->getDeviceBitfieldForAllocation(rootDeviceIndex)); - allocProperties.flags.crossRootDeviceAccess = true; - - allocationInfo[rootDeviceIndex].memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties, cpuPtr); - } else { - AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(rootDeviceIndex, imgInfo, - false, // allocateMemory - const_cast(memoryPropertiesToSet), hwInfo, - context->getDeviceBitfieldForAllocation(rootDeviceIndex)); - allocProperties.flags.crossRootDeviceAccess = true; - auto hostPtrForcedSize = alignUp(hostPtrMinSize, MemoryConstants::pageSize); - hostPtrForced = alignedMalloc(hostPtrForcedSize, MemoryConstants::pageSize); - allocationInfo[rootDeviceIndex].memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties, hostPtrForced); - if (allocationInfo[rootDeviceIndex].memory) { - cpuPtr = reinterpret_cast(allocationInfo[rootDeviceIndex].memory->getUnderlyingBuffer()); - } - } - } else { - AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(rootDeviceIndex, imgInfo, - true, // allocateMemory - memoryProperties, hwInfo, - context->getDeviceBitfieldForAllocation(rootDeviceIndex)); - allocationInfo[rootDeviceIndex].memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties); - } + AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(rootDeviceIndex, imgInfo, + true, // allocateMemory + memoryProperties, hwInfo, + context->getDeviceBitfieldForAllocation(rootDeviceIndex)); + allocationInfo[rootDeviceIndex].memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties); if (allocationInfo[rootDeviceIndex].memory && MemoryPool::isSystemMemoryPool(allocationInfo[rootDeviceIndex].memory->getMemoryPool())) { allocationInfo[rootDeviceIndex].zeroCopyAllowed = true; @@ -368,9 +338,6 @@ Image *Image::create(Context *context, if (!allocationInfo[rootDeviceIndex].memory) { cleanAllGraphicsAllocations(*context, *memoryManager, allocationInfo, isParentObject); - if (hostPtrForced) { - alignedFree(hostPtrForced); - } return image; } @@ -403,11 +370,10 @@ Image *Image::create(Context *context, auto defaultRootDeviceIndex = context->getDevice(0u)->getRootDeviceIndex(); + multiGraphicsAllocation.setMultiStorage(context->getRootDeviceIndices().size() > 1); + image = createImageHw(context, memoryProperties, flags, flagsIntel, imgInfo.size, hostPtrToSet, surfaceFormat->OCLImageFormat, imageDescriptor, allocationInfo[defaultRootDeviceIndex].zeroCopyAllowed, std::move(multiGraphicsAllocation), false, 0, 0, surfaceFormat); - if (hostPtrForced) { - image->setAllocatedMapPtr(hostPtrForced); - } for (auto &rootDeviceIndex : context->getRootDeviceIndices()) { @@ -451,7 +417,7 @@ Image *Image::create(Context *context, if (context->isProvidingPerformanceHints() && image->isMemObjZeroCopy()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_IMAGE_MEETS_ALIGNMENT_RESTRICTIONS, static_cast(image)); } - if (allocationInfo[rootDeviceIndex].transferNeeded) { + if (allocationInfo[rootDeviceIndex].transferNeeded && !transferedMemory) { std::array copyOrigin = {{0, 0, 0}}; std::array copyRegion = {{imageWidth, imageHeight, std::max(imageDepth, imageCount)}}; if (imageDesc->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { @@ -488,6 +454,7 @@ Image *Image::create(Context *context, context->getMemoryManager()->unlockResource(allocationInfo[rootDeviceIndex].memory); } } + transferedMemory = true; } if (allocationInfo[rootDeviceIndex].mapAllocation) { @@ -1548,5 +1515,22 @@ cl_int Image::checkIfDeviceSupportsImages(cl_context context) { return CL_INVALID_CONTEXT; } +void Image::fillImageRegion(size_t *region) const { + region[0] = imageDesc.image_width; + if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { + region[1] = imageDesc.image_array_size; + } else if (Image::isImage1d(imageDesc)) { + region[1] = 1u; + } else { + region[1] = imageDesc.image_height; + } + if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { + region[2] = imageDesc.image_array_size; + } else if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE3D) { + region[2] = imageDesc.image_depth; + } else { + region[2] = 1u; + } +} } // namespace NEO diff --git a/opencl/source/mem_obj/image.h b/opencl/source/mem_obj/image.h index 6697210233..ef7feaa68c 100644 --- a/opencl/source/mem_obj/image.h +++ b/opencl/source/mem_obj/image.h @@ -199,6 +199,8 @@ class Image : public MemObj { static cl_int checkIfDeviceSupportsImages(cl_context context); + void fillImageRegion(size_t *region) const; + protected: Image(Context *context, const MemoryProperties &memoryProperties, diff --git a/opencl/source/mem_obj/mem_obj.h b/opencl/source/mem_obj/mem_obj.h index 8a972369fa..d8519fa1a0 100644 --- a/opencl/source/mem_obj/mem_obj.h +++ b/opencl/source/mem_obj/mem_obj.h @@ -151,6 +151,12 @@ class MemObj : public BaseObject<_cl_mem> { const cl_mem_flags &getFlagsIntel() const { return flagsIntel; } const MultiGraphicsAllocation &getMultiGraphicsAllocation() const { return multiGraphicsAllocation; } static void cleanAllGraphicsAllocations(Context &context, MemoryManager &memoryManager, AllocationInfoType &allocationInfo, bool isParentObject); + MemObj *getHighestRootMemObj() { + if (!associatedMemObject) { + return this; + } + return associatedMemObject->getHighestRootMemObj(); + } protected: void getOsSpecificMemObjectInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam); diff --git a/opencl/source/memory_manager/CMakeLists.txt b/opencl/source/memory_manager/CMakeLists.txt index 935b52da3a..5c0e0751ad 100644 --- a/opencl/source/memory_manager/CMakeLists.txt +++ b/opencl/source/memory_manager/CMakeLists.txt @@ -9,6 +9,8 @@ set(RUNTIME_SRCS_MEMORY_MANAGER ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/compression_selector_ocl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/cpu_page_fault_manager_memory_sync.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_surface.h + ${CMAKE_CURRENT_SOURCE_DIR}/migration_controller.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/migration_controller.h ${CMAKE_CURRENT_SOURCE_DIR}/resource_surface.h ) diff --git a/opencl/source/memory_manager/migration_controller.cpp b/opencl/source/memory_manager/migration_controller.cpp new file mode 100644 index 0000000000..d6fecacd37 --- /dev/null +++ b/opencl/source/memory_manager/migration_controller.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "opencl/source/memory_manager/migration_controller.h" + +#include "shared/source/command_stream/command_stream_receiver.h" +#include "shared/source/memory_manager/memory_manager.h" +#include "shared/source/memory_manager/migration_sync_data.h" + +#include "opencl/source/command_queue/command_queue.h" +#include "opencl/source/context/context.h" +#include "opencl/source/mem_obj/image.h" +#include "opencl/source/mem_obj/mem_obj.h" + +namespace NEO { +class MemoryManager; +class CommandStreamReceiver; +class MultiGraphicsAllocation; +void MigrationController::handleMigration(Context &context, CommandStreamReceiver &targetCsr, MemObj *memObj) { + auto memoryManager = targetCsr.getMemoryManager(); + auto targetRootDeviceIndex = targetCsr.getRootDeviceIndex(); + auto migrationSyncData = memObj->getMultiGraphicsAllocation().getMigrationSyncData(); + if (!migrationSyncData->isUsedByTheSameContext(targetCsr.getTagAddress())) { + migrationSyncData->waitOnCpu(); + } + if (migrationSyncData->getCurrentLocation() != targetRootDeviceIndex) { + migrateMemory(context, *memoryManager, memObj, targetRootDeviceIndex); + } + migrationSyncData->signalUsage(targetCsr.getTagAddress(), targetCsr.peekTaskCount() + 1); +} + +void MigrationController::migrateMemory(Context &context, MemoryManager &memoryManager, MemObj *memObj, uint32_t targetRootDeviceIndex) { + auto &multiGraphicsAllocation = memObj->getMultiGraphicsAllocation(); + auto migrationSyncData = multiGraphicsAllocation.getMigrationSyncData(); + + auto sourceRootDeviceIndex = migrationSyncData->getCurrentLocation(); + if (sourceRootDeviceIndex == std::numeric_limits::max()) { + migrationSyncData->setCurrentLocation(targetRootDeviceIndex); + return; + } + + migrationSyncData->startMigration(); + + auto srcMemory = multiGraphicsAllocation.getGraphicsAllocation(sourceRootDeviceIndex); + auto dstMemory = multiGraphicsAllocation.getGraphicsAllocation(targetRootDeviceIndex); + + auto size = srcMemory->getUnderlyingBufferSize(); + auto hostPtr = migrationSyncData->getHostPtr(); + + if (srcMemory->isAllocationLockable()) { + auto srcLockPtr = memoryManager.lockResource(srcMemory); + memcpy_s(hostPtr, size, srcLockPtr, size); + memoryManager.unlockResource(srcMemory); + } else { + + auto srcCmdQ = context.getSpecialQueue(sourceRootDeviceIndex); + if (srcMemory->getAllocationType() == GraphicsAllocation::AllocationType::IMAGE) { + auto pImage = static_cast(memObj); + size_t origin[3] = {}; + size_t region[3] = {}; + pImage->fillImageRegion(region); + + srcCmdQ->enqueueReadImage(pImage, CL_TRUE, origin, region, pImage->getHostPtrRowPitch(), pImage->getHostPtrSlicePitch(), hostPtr, nullptr, 0, nullptr, nullptr); + } else { + auto pBuffer = static_cast(memObj); + srcCmdQ->enqueueReadBuffer(pBuffer, CL_TRUE, 0u, pBuffer->getSize(), hostPtr, nullptr, 0, nullptr, nullptr); + } + srcCmdQ->finish(); + } + + if (dstMemory->isAllocationLockable()) { + auto dstLockPtr = memoryManager.lockResource(dstMemory); + memcpy_s(dstLockPtr, size, hostPtr, size); + memoryManager.unlockResource(dstMemory); + } else { + + auto dstCmdQ = context.getSpecialQueue(targetRootDeviceIndex); + if (dstMemory->getAllocationType() == GraphicsAllocation::AllocationType::IMAGE) { + auto pImage = static_cast(memObj); + size_t origin[3] = {}; + size_t region[3] = {}; + pImage->fillImageRegion(region); + + dstCmdQ->enqueueWriteImage(pImage, CL_TRUE, origin, region, pImage->getHostPtrRowPitch(), pImage->getHostPtrSlicePitch(), hostPtr, nullptr, 0, nullptr, nullptr); + } else { + auto pBuffer = static_cast(memObj); + dstCmdQ->enqueueWriteBuffer(pBuffer, CL_TRUE, 0u, pBuffer->getSize(), hostPtr, nullptr, 0, nullptr, nullptr); + } + dstCmdQ->finish(); + } + migrationSyncData->setCurrentLocation(targetRootDeviceIndex); +} +} // namespace NEO \ No newline at end of file diff --git a/opencl/source/memory_manager/migration_controller.h b/opencl/source/memory_manager/migration_controller.h new file mode 100644 index 0000000000..dc51b013fc --- /dev/null +++ b/opencl/source/memory_manager/migration_controller.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include + +namespace NEO { +class MemoryManager; +class CommandStreamReceiver; +class Context; +class MultiGraphicsAllocation; +class MemObj; +class MigrationController { + public: + static void handleMigration(Context &context, CommandStreamReceiver &targetCsr, MemObj *memObj); + static void migrateMemory(Context &context, MemoryManager &memoryManager, MemObj *memObj, uint32_t targetRootDeviceIndex); +}; +} // namespace NEO \ No newline at end of file diff --git a/opencl/test/unit_test/api/cl_create_image_tests.cpp b/opencl/test/unit_test/api/cl_create_image_tests.cpp index 78e33ebb5f..f4bd91d10a 100644 --- a/opencl/test/unit_test/api/cl_create_image_tests.cpp +++ b/opencl/test/unit_test/api/cl_create_image_tests.cpp @@ -1364,9 +1364,8 @@ INSTANTIATE_TEST_CASE_P(clCreateNon2dImageFromImageTests, using clCreateImageWithMultiDeviceContextTests = MultiRootDeviceFixture; -TEST_F(clCreateImageWithMultiDeviceContextTests, GivenImageCreatedWithoutHostPtrAndWithContextdWithMultiDeviceThenGraphicsAllocationsAreProperlyCreatedAndMapPtrIsSet) { +TEST_F(clCreateImageWithMultiDeviceContextTests, GivenImageCreatedWithoutHostPtrAndWithContextdWithMultiDeviceThenGraphicsAllocationsAreProperlyCreatedAndMapPtrIsNotSet) { REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); - DebugManagerStateRestore dbgRestore; std::unique_ptr image(ImageHelper::create(context.get())); @@ -1375,10 +1374,8 @@ TEST_F(clCreateImageWithMultiDeviceContextTests, GivenImageCreatedWithoutHostPtr EXPECT_NE(image->getMultiGraphicsAllocation().getGraphicsAllocation(2u), nullptr); EXPECT_NE(image->getMultiGraphicsAllocation().getGraphicsAllocation(1u), image->getMultiGraphicsAllocation().getGraphicsAllocation(2u)); - EXPECT_TRUE(MemoryPool::isSystemMemoryPool(image->getMultiGraphicsAllocation().getGraphicsAllocation(1u)->getMemoryPool())); - EXPECT_TRUE(MemoryPool::isSystemMemoryPool(image->getMultiGraphicsAllocation().getGraphicsAllocation(2u)->getMemoryPool())); - - EXPECT_NE(image->getAllocatedMapPtr(), nullptr); + EXPECT_EQ(image->getAllocatedMapPtr(), nullptr); + EXPECT_TRUE(image->getMultiGraphicsAllocation().requiresMigrations()); } TEST_F(clCreateImageWithMultiDeviceContextTests, GivenImageCreatedWithHostPtrAndWithContextdWithMultiDeviceThenGraphicsAllocationsAreProperlyCreatedAndMapPtrIsNotSet) { @@ -1392,8 +1389,7 @@ TEST_F(clCreateImageWithMultiDeviceContextTests, GivenImageCreatedWithHostPtrAnd EXPECT_NE(image->getMultiGraphicsAllocation().getGraphicsAllocation(2u), nullptr); EXPECT_NE(image->getMultiGraphicsAllocation().getGraphicsAllocation(1u), image->getMultiGraphicsAllocation().getGraphicsAllocation(2u)); - EXPECT_TRUE(MemoryPool::isSystemMemoryPool(image->getMultiGraphicsAllocation().getGraphicsAllocation(1u)->getMemoryPool())); - EXPECT_TRUE(MemoryPool::isSystemMemoryPool(image->getMultiGraphicsAllocation().getGraphicsAllocation(2u)->getMemoryPool())); + EXPECT_TRUE(image->getMultiGraphicsAllocation().requiresMigrations()); EXPECT_EQ(image->getAllocatedMapPtr(), nullptr); } diff --git a/opencl/test/unit_test/command_queue/command_queue_fixture.cpp b/opencl/test/unit_test/command_queue/command_queue_fixture.cpp index 190fef7fc5..c1a1dee1ca 100644 --- a/opencl/test/unit_test/command_queue/command_queue_fixture.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_fixture.cpp @@ -33,7 +33,6 @@ CommandQueue *CommandQueueHwFixture::createCommandQueue( CommandQueue *CommandQueueHwFixture::createCommandQueue( ClDevice *pDevice, const cl_command_queue_properties *properties) { - if (pDevice == nullptr) { if (this->device == nullptr) { this->device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment(nullptr)}; @@ -42,13 +41,20 @@ CommandQueue *CommandQueueHwFixture::createCommandQueue( pDevice = this->device; } - if (!context) + if (!context) { context = new MockContext(pDevice); + } + return createCommandQueue(pDevice, properties, context); +} +CommandQueue *CommandQueueHwFixture::createCommandQueue( + ClDevice *pDevice, + const cl_command_queue_properties *properties, + Context *pContext) { auto funcCreate = commandQueueFactory[pDevice->getRenderCoreFamily()]; assert(nullptr != funcCreate); - return funcCreate(context, pDevice, properties, false); + return funcCreate(pContext, pDevice, properties, false); } void CommandQueueHwFixture::SetUp() { diff --git a/opencl/test/unit_test/command_queue/command_queue_fixture.h b/opencl/test/unit_test/command_queue/command_queue_fixture.h index aad8ade81c..5be76f08c3 100644 --- a/opencl/test/unit_test/command_queue/command_queue_fixture.h +++ b/opencl/test/unit_test/command_queue/command_queue_fixture.h @@ -28,6 +28,11 @@ struct CommandQueueHwFixture { ClDevice *device, const cl_command_queue_properties *properties); + CommandQueue *createCommandQueue( + ClDevice *device, + const cl_command_queue_properties *properties, + Context *context); + virtual void SetUp(); virtual void SetUp(ClDevice *pDevice, cl_command_queue_properties properties); diff --git a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp index f541cb95d2..2430c87797 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_image_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/memory_manager/allocations_list.h" +#include "shared/source/memory_manager/migration_sync_data.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" @@ -92,6 +93,319 @@ HWTEST_F(EnqueueReadImageTest, whenEnqueueReadImageThenBuiltinKernelIsResolved) pCmdQ->finish(); } +HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageThenKernelRequiresMigration) { + + MockDefaultContext context; + + auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); + + auto pImage = Image2dHelper<>::create(&context); + EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); + + UserEvent userEvent{}; + cl_event inputEvent = &userEvent; + cl_event outputEvent{}; + + EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE, + EnqueueReadImageTraits::origin, + EnqueueReadImageTraits::region, + EnqueueReadImageTraits::rowPitch, + EnqueueReadImageTraits::slicePitch, + EnqueueReadImageTraits::hostPtr, + EnqueueReadImageTraits::mapAllocation, + 1u, + &inputEvent, + &outputEvent); + + auto pEvent = castToObject(outputEvent); + auto pCommand = static_cast(pEvent->peekCommand()); + auto pKernel = pCommand->peekKernel(); + EXPECT_FALSE(pKernel->Kernel::canTransformImages()); + EXPECT_TRUE(pKernel->isPatched()); + EXPECT_TRUE(pKernel->requiresMemoryMigration()); + + auto &memObjectsForMigration = pKernel->getMemObjectsToMigrate(); + ASSERT_EQ(1u, memObjectsForMigration.size()); + auto memObj = memObjectsForMigration.begin()->second; + for (auto &rootDeviceIndex : context.getRootDeviceIndices()) { + EXPECT_EQ(pImage->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex), memObj->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex)); + } + + EXPECT_TRUE(memObj->getMultiGraphicsAllocation().requiresMigrations()); + + EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + + userEvent.setStatus(CL_COMPLETE); + + EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + pEvent->release(); + pCmdQ1->finish(); + pCmdQ1->release(); + pImage->release(); +} + +HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageIsCalledMultipleTimesThenEachKernelUsesDifferentImage) { + + MockDefaultContext context; + + auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); + + auto pImage = Image2dHelper<>::create(&context); + EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); + + UserEvent userEvent{}; + cl_event inputEvent = &userEvent; + cl_event outputEvent0{}; + cl_event outputEvent1{}; + + EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE, + EnqueueReadImageTraits::origin, + EnqueueReadImageTraits::region, + EnqueueReadImageTraits::rowPitch, + EnqueueReadImageTraits::slicePitch, + EnqueueReadImageTraits::hostPtr, + EnqueueReadImageTraits::mapAllocation, + 1u, + &inputEvent, + &outputEvent0); + EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + + auto pEvent0 = castToObject(outputEvent0); + auto pCommand0 = static_cast(pEvent0->peekCommand()); + auto pKernel0 = pCommand0->peekKernel(); + EXPECT_FALSE(pKernel0->Kernel::canTransformImages()); + EXPECT_TRUE(pKernel0->isPatched()); + EXPECT_TRUE(pKernel0->requiresMemoryMigration()); + + auto &memObjectsForMigration0 = pKernel0->getMemObjectsToMigrate(); + ASSERT_EQ(1u, memObjectsForMigration0.size()); + auto memObj0 = memObjectsForMigration0.begin()->second; + for (auto &rootDeviceIndex : context.getRootDeviceIndices()) { + EXPECT_EQ(pImage->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex), memObj0->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex)); + } + + EXPECT_TRUE(memObj0->getMultiGraphicsAllocation().requiresMigrations()); + + EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE, + EnqueueReadImageTraits::origin, + EnqueueReadImageTraits::region, + EnqueueReadImageTraits::rowPitch, + EnqueueReadImageTraits::slicePitch, + EnqueueReadImageTraits::hostPtr, + EnqueueReadImageTraits::mapAllocation, + 1u, + &outputEvent0, + &outputEvent1); + EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + + auto pEvent1 = castToObject(outputEvent1); + auto pCommand1 = static_cast(pEvent1->peekCommand()); + auto pKernel1 = pCommand1->peekKernel(); + EXPECT_FALSE(pKernel1->Kernel::canTransformImages()); + EXPECT_TRUE(pKernel1->isPatched()); + EXPECT_TRUE(pKernel1->requiresMemoryMigration()); + + auto &memObjectsForMigration1 = pKernel1->getMemObjectsToMigrate(); + ASSERT_EQ(1u, memObjectsForMigration1.size()); + auto memObj1 = memObjectsForMigration1.begin()->second; + for (auto &rootDeviceIndex : context.getRootDeviceIndices()) { + EXPECT_EQ(pImage->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex), memObj1->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex)); + } + + EXPECT_TRUE(memObj1->getMultiGraphicsAllocation().requiresMigrations()); + + EXPECT_NE(memObj0, memObj1); + + userEvent.setStatus(CL_COMPLETE); + + EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + pEvent0->release(); + pEvent1->release(); + pCmdQ1->finish(); + pCmdQ1->release(); + pImage->release(); +} + +HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueReadImageIsCalledThenCommandQueueIsFlushed) { + MockDefaultContext context; + + auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); + + auto pImage = Image2dHelper<>::create(&context); + EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); + auto &ultCsr = static_cast &>(pCmdQ1->getGpgpuCommandStreamReceiver()); + + EXPECT_FALSE(ultCsr.flushBatchedSubmissionsCalled); + EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE); + + EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + EXPECT_TRUE(ultCsr.flushBatchedSubmissionsCalled); + pCmdQ1->finish(); + pCmdQ1->release(); + pImage->release(); +} + +HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueReadImageIsCalledThenTlbCacheIsInvalidated) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + MockDefaultContext context; + + auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); + + auto pImage = Image2dHelper<>::create(&context); + EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); + + EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE); + + EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + pCmdQ1->finish(); + + { + HardwareParse hwParser; + hwParser.parseCommands(pCmdQ1->getCS(0), 0); + auto pipeControls = findAll(hwParser.cmdList.begin(), hwParser.cmdList.end()); + EXPECT_LT(0u, pipeControls.size()); + bool pipeControlWithTlbInvalidateFound = false; + for (auto &pipeControl : pipeControls) { + auto pipeControlCmd = genCmdCast(*pipeControl); + if (pipeControlCmd->getTlbInvalidate()) { + EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); + pipeControlWithTlbInvalidateFound = true; + } + } + EXPECT_TRUE(pipeControlWithTlbInvalidateFound); + } + + pCmdQ1->release(); + pImage->release(); +} + +HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageIsCalledToDifferentDevicesThenCorrectLocationIsSet) { + + MockDefaultContext context; + + auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); + auto pCmdQ2 = createCommandQueue(context.getDevice(1), nullptr, &context); + + auto pImage = Image2dHelper<>::create(&context); + EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); + auto &ultCsr1 = static_cast &>(pCmdQ1->getGpgpuCommandStreamReceiver()); + auto &ultCsr2 = static_cast &>(pCmdQ2->getGpgpuCommandStreamReceiver()); + + EXPECT_FALSE(ultCsr1.flushBatchedSubmissionsCalled); + EXPECT_FALSE(ultCsr2.flushBatchedSubmissionsCalled); + EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE, + EnqueueReadImageTraits::origin, + EnqueueReadImageTraits::region, + EnqueueReadImageTraits::rowPitch, + EnqueueReadImageTraits::slicePitch, + EnqueueReadImageTraits::hostPtr, + EnqueueReadImageTraits::mapAllocation, + 0u, + nullptr, + nullptr); + + EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + EXPECT_TRUE(ultCsr1.flushBatchedSubmissionsCalled); + EXPECT_FALSE(ultCsr2.flushBatchedSubmissionsCalled); + pCmdQ1->finish(); + + EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ2, pImage, CL_FALSE, + EnqueueReadImageTraits::origin, + EnqueueReadImageTraits::region, + EnqueueReadImageTraits::rowPitch, + EnqueueReadImageTraits::slicePitch, + EnqueueReadImageTraits::hostPtr, + EnqueueReadImageTraits::mapAllocation, + 0u, + nullptr, + nullptr); + + EXPECT_EQ(1u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + EXPECT_TRUE(ultCsr2.flushBatchedSubmissionsCalled); + pCmdQ2->finish(); + + EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE, + EnqueueReadImageTraits::origin, + EnqueueReadImageTraits::region, + EnqueueReadImageTraits::rowPitch, + EnqueueReadImageTraits::slicePitch, + EnqueueReadImageTraits::hostPtr, + EnqueueReadImageTraits::mapAllocation, + 0u, + nullptr, + nullptr); + + EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + pCmdQ1->finish(); + pCmdQ1->release(); + pCmdQ2->release(); + pImage->release(); +} + +HWTEST_F(EnqueueReadImageTest, givenImageFromBufferThatRequiresMigrationWhenEnqueueReadImageThenBufferObjectIsTakenForMigration) { + + MockDefaultContext context; + + auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); + + auto pBuffer = BufferHelper<>::create(&context); + auto imageDesc = Image2dDefaults::imageDesc; + + cl_mem clBuffer = pBuffer; + imageDesc.mem_object = clBuffer; + + const_cast(pBuffer->getMultiGraphicsAllocation()).setMultiStorage(true); + + EXPECT_TRUE(pBuffer->getMultiGraphicsAllocation().requiresMigrations()); + auto pImage = Image2dHelper<>::create(&context, &imageDesc); + EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); + + UserEvent userEvent{}; + cl_event inputEvent = &userEvent; + cl_event outputEvent{}; + + EnqueueReadImageHelper<>::enqueueReadImage(pCmdQ1, pImage, CL_FALSE, + EnqueueReadImageTraits::origin, + EnqueueReadImageTraits::region, + EnqueueReadImageTraits::rowPitch, + EnqueueReadImageTraits::slicePitch, + EnqueueReadImageTraits::hostPtr, + EnqueueReadImageTraits::mapAllocation, + 1u, + &inputEvent, + &outputEvent); + + auto pEvent = castToObject(outputEvent); + auto pCommand = static_cast(pEvent->peekCommand()); + auto pKernel = pCommand->peekKernel(); + EXPECT_FALSE(pKernel->Kernel::canTransformImages()); + EXPECT_TRUE(pKernel->isPatched()); + EXPECT_TRUE(pKernel->requiresMemoryMigration()); + + auto &memObjectsForMigration = pKernel->getMemObjectsToMigrate(); + ASSERT_EQ(1u, memObjectsForMigration.size()); + auto memObj = memObjectsForMigration.begin()->second; + EXPECT_EQ(static_cast(pBuffer), memObj); + + EXPECT_TRUE(memObj->getMultiGraphicsAllocation().requiresMigrations()); + + EXPECT_EQ(MigrationSyncData::locationUndefined, pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + + userEvent.setStatus(CL_COMPLETE); + + EXPECT_EQ(0u, pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + pEvent->release(); + pCmdQ1->finish(); + pCmdQ1->release(); + pImage->release(); + pBuffer->release(); +} + HWTEST_F(EnqueueReadImageTest, GivenNonBlockingEnqueueWhenReadingImageThenTaskLevelIsIncremented) { //this test case assumes IOQ auto &csr = pDevice->getUltCommandStreamReceiver(); diff --git a/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp index b05c3b6978..a0843d033f 100644 --- a/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_write_image_tests.cpp @@ -7,6 +7,7 @@ #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/memory_manager.h" +#include "shared/source/memory_manager/migration_sync_data.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" @@ -600,3 +601,219 @@ HWTEST_F(OneMipLevelWriteImageTests, GivenNotMippedImageWhenWritingImageThenDoNo EXPECT_TRUE(builtinOpsParamsCaptured); EXPECT_EQ(0u, usedBuiltinOpsParams.dstMipLevel); } + +HWTEST_F(EnqueueWriteImageTest, whenEnqueueWriteImageThenBuiltinKernelIsResolved) { + + UserEvent userEvent{}; + cl_event inputEvent = &userEvent; + cl_event outputEvent{}; + + EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ, dstImage, CL_FALSE, + EnqueueWriteImageTraits::origin, + EnqueueWriteImageTraits::region, + EnqueueWriteImageTraits::rowPitch, + EnqueueWriteImageTraits::slicePitch, + EnqueueWriteImageTraits::hostPtr, + EnqueueWriteImageTraits::mapAllocation, + 1u, + &inputEvent, + &outputEvent); + + auto pEvent = castToObject(outputEvent); + auto pCommand = static_cast(pEvent->peekCommand()); + EXPECT_FALSE(pCommand->peekKernel()->Kernel::canTransformImages()); + EXPECT_TRUE(pCommand->peekKernel()->isPatched()); + userEvent.setStatus(CL_COMPLETE); + pEvent->release(); + pCmdQ->finish(); +} + +HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenEnqueueWriteImageThenKernelRequiresMigration) { + + MockDefaultContext context; + + auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); + + auto pImage = Image2dHelper<>::create(&context); + EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); + + UserEvent userEvent{}; + cl_event inputEvent = &userEvent; + cl_event outputEvent{}; + + EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ1, pImage, CL_FALSE, + EnqueueWriteImageTraits::origin, + EnqueueWriteImageTraits::region, + EnqueueWriteImageTraits::rowPitch, + EnqueueWriteImageTraits::slicePitch, + EnqueueWriteImageTraits::hostPtr, + EnqueueWriteImageTraits::mapAllocation, + 1u, + &inputEvent, + &outputEvent); + + auto pEvent = castToObject(outputEvent); + auto pCommand = static_cast(pEvent->peekCommand()); + auto pKernel = pCommand->peekKernel(); + EXPECT_FALSE(pKernel->Kernel::canTransformImages()); + EXPECT_TRUE(pKernel->isPatched()); + EXPECT_TRUE(pKernel->requiresMemoryMigration()); + + auto &memObjectsForMigration = pKernel->getMemObjectsToMigrate(); + ASSERT_EQ(1u, memObjectsForMigration.size()); + auto memObj = memObjectsForMigration.begin()->second; + for (auto &rootDeviceIndex : context.getRootDeviceIndices()) { + EXPECT_EQ(pImage->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex), memObj->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex)); + } + + EXPECT_TRUE(memObj->getMultiGraphicsAllocation().requiresMigrations()); + + EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + + userEvent.setStatus(CL_COMPLETE); + + EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + pEvent->release(); + pCmdQ1->finish(); + pCmdQ1->release(); + pImage->release(); +} + +HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenEnqueueWriteImageIsCalledMultipleTimesThenEachKernelUsesDifferentImage) { + + MockDefaultContext context; + + auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); + + auto pImage = Image2dHelper<>::create(&context); + EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); + + UserEvent userEvent{}; + cl_event inputEvent = &userEvent; + cl_event outputEvent0{}; + cl_event outputEvent1{}; + + EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ1, pImage, CL_FALSE, + EnqueueWriteImageTraits::origin, + EnqueueWriteImageTraits::region, + EnqueueWriteImageTraits::rowPitch, + EnqueueWriteImageTraits::slicePitch, + EnqueueWriteImageTraits::hostPtr, + EnqueueWriteImageTraits::mapAllocation, + 1u, + &inputEvent, + &outputEvent0); + EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + + auto pEvent0 = castToObject(outputEvent0); + auto pCommand0 = static_cast(pEvent0->peekCommand()); + auto pKernel0 = pCommand0->peekKernel(); + EXPECT_FALSE(pKernel0->Kernel::canTransformImages()); + EXPECT_TRUE(pKernel0->isPatched()); + EXPECT_TRUE(pKernel0->requiresMemoryMigration()); + + auto &memObjectsForMigration0 = pKernel0->getMemObjectsToMigrate(); + ASSERT_EQ(1u, memObjectsForMigration0.size()); + auto memObj0 = memObjectsForMigration0.begin()->second; + for (auto &rootDeviceIndex : context.getRootDeviceIndices()) { + EXPECT_EQ(pImage->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex), memObj0->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex)); + } + + EXPECT_TRUE(memObj0->getMultiGraphicsAllocation().requiresMigrations()); + + EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ1, pImage, CL_FALSE, + EnqueueWriteImageTraits::origin, + EnqueueWriteImageTraits::region, + EnqueueWriteImageTraits::rowPitch, + EnqueueWriteImageTraits::slicePitch, + EnqueueWriteImageTraits::hostPtr, + EnqueueWriteImageTraits::mapAllocation, + 1u, + &outputEvent0, + &outputEvent1); + EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + + auto pEvent1 = castToObject(outputEvent1); + auto pCommand1 = static_cast(pEvent1->peekCommand()); + auto pKernel1 = pCommand1->peekKernel(); + EXPECT_FALSE(pKernel1->Kernel::canTransformImages()); + EXPECT_TRUE(pKernel1->isPatched()); + EXPECT_TRUE(pKernel1->requiresMemoryMigration()); + + auto &memObjectsForMigration1 = pKernel1->getMemObjectsToMigrate(); + ASSERT_EQ(1u, memObjectsForMigration1.size()); + auto memObj1 = memObjectsForMigration1.begin()->second; + for (auto &rootDeviceIndex : context.getRootDeviceIndices()) { + EXPECT_EQ(pImage->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex), memObj1->getMultiGraphicsAllocation().getGraphicsAllocation(rootDeviceIndex)); + } + + EXPECT_TRUE(memObj1->getMultiGraphicsAllocation().requiresMigrations()); + + EXPECT_NE(memObj0, memObj1); + + userEvent.setStatus(CL_COMPLETE); + + EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + pEvent0->release(); + pEvent1->release(); + pCmdQ1->finish(); + pCmdQ1->release(); + pImage->release(); +} + +HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueWriteImageIsCalledThenCommandQueueIsFlushed) { + MockDefaultContext context; + + auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); + + auto pImage = Image2dHelper<>::create(&context); + EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); + auto &ultCsr = static_cast &>(pCmdQ1->getGpgpuCommandStreamReceiver()); + + EXPECT_FALSE(ultCsr.flushBatchedSubmissionsCalled); + EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ1, pImage, CL_FALSE); + + EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + EXPECT_TRUE(ultCsr.flushBatchedSubmissionsCalled); + pCmdQ1->finish(); + pCmdQ1->release(); + pImage->release(); +} + +HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueWriteImageIsCalledThenTlbCacheIsInvalidated) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + MockDefaultContext context; + + auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context); + + auto pImage = Image2dHelper<>::create(&context); + EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); + + EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + EnqueueWriteImageHelper<>::enqueueWriteImage(pCmdQ1, pImage, CL_FALSE); + + EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + pCmdQ1->finish(); + + { + HardwareParse hwParser; + hwParser.parseCommands(pCmdQ1->getCS(0), 0); + auto pipeControls = findAll(hwParser.cmdList.begin(), hwParser.cmdList.end()); + EXPECT_LT(0u, pipeControls.size()); + bool pipeControlWithTlbInvalidateFound = false; + for (auto &pipeControl : pipeControls) { + auto pipeControlCmd = genCmdCast(*pipeControl); + if (pipeControlCmd->getTlbInvalidate()) { + EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); + pipeControlWithTlbInvalidateFound = true; + } + } + EXPECT_TRUE(pipeControlWithTlbInvalidateFound); + } + + pCmdQ1->release(); + pImage->release(); +} \ No newline at end of file diff --git a/opencl/test/unit_test/command_stream/compute_mode_tests.h b/opencl/test/unit_test/command_stream/compute_mode_tests.h index 057ce8a206..3c85ead203 100644 --- a/opencl/test/unit_test/command_stream/compute_mode_tests.h +++ b/opencl/test/unit_test/command_stream/compute_mode_tests.h @@ -81,6 +81,6 @@ struct ComputeModeRequirements : public ::testing::Test { CommandStreamReceiver *csr = nullptr; std::unique_ptr device; - DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, 1}; + DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false}; GraphicsAllocation *alloc = nullptr; }; diff --git a/opencl/test/unit_test/mem_obj/image_tests.cpp b/opencl/test/unit_test/mem_obj/image_tests.cpp index 72f3242a4b..14709dcbb4 100644 --- a/opencl/test/unit_test/mem_obj/image_tests.cpp +++ b/opencl/test/unit_test/mem_obj/image_tests.cpp @@ -1626,35 +1626,67 @@ HWTEST_F(ImageTransformTest, givenSurfaceBaseAddressAndUnifiedSurfaceWhenSetUnif EXPECT_EQ(surfBsaseAddress + offset, surfaceState.getAuxiliarySurfaceBaseAddress()); } -using ImageMultiRootDeviceTests = MultiRootDeviceFixture; +TEST(ImageTest, givenImageWhenFillRegionIsCalledThenProperRegionIsSet) { + MockContext context; -TEST_F(ImageMultiRootDeviceTests, WhenImageIsCreatedThenImageAllocationHostPtrForcedHasCorrectAlignment) { - std::unique_ptr image(ImageHelper::create(context.get())); + { + size_t region[3] = {}; + std::unique_ptr image(Image1dHelper<>::create(&context)); - auto hostPtrForced = image->getAllocatedMapPtr(); + image->fillImageRegion(region); - ASSERT_NE(nullptr, hostPtrForced); - EXPECT_EQ(0u, (uintptr_t)hostPtrForced % MemoryConstants::pageSize); -} + EXPECT_EQ(Image1dDefaults::imageDesc.image_width, region[0]); + EXPECT_EQ(1u, region[1]); + EXPECT_EQ(1u, region[2]); + } + { + size_t region[3] = {}; + std::unique_ptr image(Image1dArrayHelper<>::create(&context)); -TEST_F(ImageMultiRootDeviceTests, WhenImageIsCreatedThenImageAllocationHasCorrectRootDeviceIndex) { - std::unique_ptr image(ImageHelper::create(context.get())); + image->fillImageRegion(region); - auto graphicsAllocation = image->getGraphicsAllocation(expectedRootDeviceIndex); - ASSERT_NE(nullptr, graphicsAllocation); - EXPECT_EQ(expectedRootDeviceIndex, graphicsAllocation->getRootDeviceIndex()); -} + EXPECT_EQ(Image1dArrayDefaults::imageDesc.image_width, region[0]); + EXPECT_EQ(Image1dArrayDefaults::imageDesc.image_array_size, region[1]); + EXPECT_EQ(1u, region[2]); + } + { + size_t region[3] = {}; + std::unique_ptr image(Image1dBufferHelper<>::create(&context)); -TEST_F(ImageMultiRootDeviceTests, WhenImageIsCreatedWithoutHostPtrThenImageMultiGraphicsAllocationIsCreatedInSystemMemoryPool) { - REQUIRE_IMAGES_OR_SKIP(defaultHwInfo); + image->fillImageRegion(region); - std::unique_ptr image(ImageHelper::create(context.get())); + EXPECT_EQ(Image1dBufferDefaults::imageDesc.image_width, region[0]); + EXPECT_EQ(1u, region[1]); + EXPECT_EQ(1u, region[2]); + } + { + size_t region[3] = {}; + std::unique_ptr image(Image2dHelper<>::create(&context)); - EXPECT_TRUE(MemoryPool::isSystemMemoryPool(image->getMultiGraphicsAllocation().getGraphicsAllocation(1u)->getMemoryPool())); - EXPECT_TRUE(MemoryPool::isSystemMemoryPool(image->getMultiGraphicsAllocation().getGraphicsAllocation(2u)->getMemoryPool())); + image->fillImageRegion(region); - auto graphicsAllocation1 = image->getMultiGraphicsAllocation().getGraphicsAllocation(1u); - auto graphicsAllocation2 = image->getMultiGraphicsAllocation().getGraphicsAllocation(2u); + EXPECT_EQ(Image2dDefaults::imageDesc.image_width, region[0]); + EXPECT_EQ(Image2dDefaults::imageDesc.image_height, region[1]); + EXPECT_EQ(1u, region[2]); + } + { + size_t region[3] = {}; + std::unique_ptr image(Image2dArrayHelper<>::create(&context)); - EXPECT_EQ(graphicsAllocation2->getUnderlyingBuffer(), graphicsAllocation1->getUnderlyingBuffer()); -} + image->fillImageRegion(region); + + EXPECT_EQ(Image2dArrayDefaults::imageDesc.image_width, region[0]); + EXPECT_EQ(Image2dArrayDefaults::imageDesc.image_height, region[1]); + EXPECT_EQ(Image2dArrayDefaults::imageDesc.image_array_size, region[2]); + } + { + size_t region[3] = {}; + std::unique_ptr image(Image3dHelper<>::create(&context)); + + image->fillImageRegion(region); + + EXPECT_EQ(Image3dDefaults::imageDesc.image_width, region[0]); + EXPECT_EQ(Image3dDefaults::imageDesc.image_height, region[1]); + EXPECT_EQ(Image3dDefaults::imageDesc.image_depth, region[2]); + } +} \ No newline at end of file diff --git a/opencl/test/unit_test/mem_obj/sub_buffer_tests.cpp b/opencl/test/unit_test/mem_obj/sub_buffer_tests.cpp index 9df18fa4c2..910cecfb86 100644 --- a/opencl/test/unit_test/mem_obj/sub_buffer_tests.cpp +++ b/opencl/test/unit_test/mem_obj/sub_buffer_tests.cpp @@ -59,6 +59,18 @@ TEST_F(SubBufferTest, WhenCreatingSubBufferThenRefInternalCountIsIncremented) { EXPECT_EQ(1, buffer->getRefInternalCount()); } +TEST_F(SubBufferTest, givenSubBufferWhenGetHighestRootMemObjIsCalledThenProperMemObjIsReturned) { + cl_buffer_region region0 = {2, 12}; + + auto subBuffer = buffer->createSubBuffer(CL_MEM_READ_ONLY, 0, ®ion0, retVal); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_EQ(static_cast(buffer), buffer->getHighestRootMemObj()); + EXPECT_EQ(static_cast(buffer), subBuffer->getHighestRootMemObj()); + + subBuffer->release(); +} + TEST_F(SubBufferTest, GivenUnalignedHostPtrBufferWhenSubBufferIsCreatedThenItIsNonZeroCopy) { cl_buffer_region region = {2, 2}; cl_int retVal = 0; diff --git a/opencl/test/unit_test/memory_manager/CMakeLists.txt b/opencl/test/unit_test/memory_manager/CMakeLists.txt index e6e8092060..a943dd00fc 100644 --- a/opencl/test/unit_test/memory_manager/CMakeLists.txt +++ b/opencl/test/unit_test/memory_manager/CMakeLists.txt @@ -13,6 +13,7 @@ set(IGDRCL_SRCS_tests_memory_manager ${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_allocate_in_device_pool_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/memory_manager_allocate_in_preferred_pool_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/migraton_controller_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/surface_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/storage_info_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unified_memory_manager_tests.cpp diff --git a/opencl/test/unit_test/memory_manager/migraton_controller_tests.cpp b/opencl/test/unit_test/memory_manager/migraton_controller_tests.cpp new file mode 100644 index 0000000000..dfba71f72e --- /dev/null +++ b/opencl/test/unit_test/memory_manager/migraton_controller_tests.cpp @@ -0,0 +1,215 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/memory_manager/migration_sync_data.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/mocks/mock_migration_sync_data.h" +#include "shared/test/common/mocks/mock_multi_graphics_allocation.h" + +#include "opencl/source/memory_manager/migration_controller.h" +#include "opencl/test/unit_test/fixtures/buffer_fixture.h" +#include "opencl/test/unit_test/fixtures/image_fixture.h" +#include "opencl/test/unit_test/mocks/mock_context.h" +#include "opencl/test/unit_test/mocks/mock_memory_manager.h" +#include "opencl/test/unit_test/test_macros/test_checks_ocl.h" +#include "test.h" + +using namespace NEO; + +struct MigrationControllerTests : public ::testing::Test { + void SetUp() override { + pCsr0 = context.getDevice(0)->getDefaultEngine().commandStreamReceiver; + pCsr1 = context.getDevice(1)->getDefaultEngine().commandStreamReceiver; + memoryManager = static_cast(context.getMemoryManager()); + } + void TearDown() override { + } + MockDefaultContext context{true}; + CommandStreamReceiver *pCsr0 = nullptr; + CommandStreamReceiver *pCsr1 = nullptr; + MockMemoryManager *memoryManager = nullptr; +}; + +TEST_F(MigrationControllerTests, givenAllocationWithUndefinedLocationWhenHandleMigrationThenNoMigrationIsPerformedAndProperLocationIsSet) { + std::unique_ptr pImage(Image1dHelper<>::create(&context)); + EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); + + EXPECT_EQ(MigrationSyncData::locationUndefined, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + MigrationController::handleMigration(context, *pCsr0, pImage.get()); + EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + + EXPECT_EQ(0u, memoryManager->lockResourceCalled); + EXPECT_EQ(0u, memoryManager->unlockResourceCalled); + EXPECT_EQ(0u, pCsr0->peekLatestFlushedTaskCount()); +} + +TEST_F(MigrationControllerTests, givenAllocationWithDefinedLocationWhenHandleMigrationToTheSameLocationThenDontMigrateMemory) { + std::unique_ptr pImage(Image1dHelper<>::create(&context)); + EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); + + pImage->getMultiGraphicsAllocation().getMigrationSyncData()->setCurrentLocation(1); + EXPECT_EQ(1u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + MigrationController::handleMigration(context, *pCsr1, pImage.get()); + EXPECT_EQ(1u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + + EXPECT_EQ(0u, memoryManager->lockResourceCalled); + EXPECT_EQ(0u, memoryManager->unlockResourceCalled); + EXPECT_EQ(0u, pCsr1->peekLatestFlushedTaskCount()); +} + +TEST_F(MigrationControllerTests, givenNotLockableImageAllocationWithDefinedLocationWhenHandleMigrationToDifferentLocationThenMigrateMemoryViaReadWriteImage) { + REQUIRE_IMAGE_SUPPORT_OR_SKIP(&context); + std::unique_ptr pImage(Image1dHelper<>::create(&context)); + EXPECT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); + + auto srcAllocation = pImage->getMultiGraphicsAllocation().getGraphicsAllocation(0); + auto dstAllocation = pImage->getMultiGraphicsAllocation().getGraphicsAllocation(1); + + srcAllocation->getDefaultGmm()->resourceParams.Flags.Info.NotLockable = 1; + dstAllocation->getDefaultGmm()->resourceParams.Flags.Info.NotLockable = 1; + + EXPECT_FALSE(srcAllocation->isAllocationLockable()); + EXPECT_FALSE(dstAllocation->isAllocationLockable()); + + pImage->getMultiGraphicsAllocation().getMigrationSyncData()->setCurrentLocation(0); + EXPECT_EQ(0u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + MigrationController::handleMigration(context, *pCsr1, pImage.get()); + EXPECT_EQ(1u, pImage->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + + EXPECT_EQ(0u, memoryManager->lockResourceCalled); + EXPECT_EQ(0u, memoryManager->unlockResourceCalled); + EXPECT_EQ(1u, pCsr1->peekLatestFlushedTaskCount()); + EXPECT_EQ(1u, pCsr0->peekLatestFlushedTaskCount()); +} + +TEST_F(MigrationControllerTests, givenNotLockableBufferAllocationWithDefinedLocationWhenHandleMigrationToDifferentLocationThenMigrateMemoryViaReadWriteBuffer) { + DebugManagerStateRestore restorer; + DebugManager.flags.DoCpuCopyOnReadBuffer.set(0); + DebugManager.flags.DoCpuCopyOnWriteBuffer.set(0); + DebugManager.flags.EnableBlitterForEnqueueOperations.set(0); + std::unique_ptr pBuffer(BufferHelper<>::create(&context)); + const_cast(pBuffer->getMultiGraphicsAllocation()).setMultiStorage(true); + EXPECT_TRUE(pBuffer->getMultiGraphicsAllocation().requiresMigrations()); + + auto srcAllocation = pBuffer->getMultiGraphicsAllocation().getGraphicsAllocation(0); + auto dstAllocation = pBuffer->getMultiGraphicsAllocation().getGraphicsAllocation(1); + + auto gmm0 = new Gmm(context.getDevice(0)->getGmmClientContext(), nullptr, 1, 0, false); + auto gmm1 = new Gmm(context.getDevice(1)->getGmmClientContext(), nullptr, 1, 0, false); + srcAllocation->setDefaultGmm(gmm0); + dstAllocation->setDefaultGmm(gmm1); + + srcAllocation->getDefaultGmm()->resourceParams.Flags.Info.NotLockable = 1; + dstAllocation->getDefaultGmm()->resourceParams.Flags.Info.NotLockable = 1; + + EXPECT_FALSE(srcAllocation->isAllocationLockable()); + EXPECT_FALSE(dstAllocation->isAllocationLockable()); + + pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->setCurrentLocation(0); + EXPECT_EQ(0u, pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + MigrationController::handleMigration(context, *pCsr1, pBuffer.get()); + EXPECT_EQ(1u, pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + + EXPECT_EQ(0u, memoryManager->lockResourceCalled); + EXPECT_EQ(0u, memoryManager->unlockResourceCalled); + EXPECT_EQ(1u, pCsr1->peekLatestFlushedTaskCount()); + EXPECT_EQ(1u, pCsr0->peekLatestFlushedTaskCount()); +} + +TEST_F(MigrationControllerTests, givenLockableBufferAllocationWithDefinedLocationWhenHandleMigrationToDifferentLocationThenMigrateMemoryViaLockMemory) { + std::unique_ptr pBuffer(BufferHelper<>::create(&context)); + const_cast(pBuffer->getMultiGraphicsAllocation()).setMultiStorage(true); + EXPECT_TRUE(pBuffer->getMultiGraphicsAllocation().requiresMigrations()); + + auto srcAllocation = pBuffer->getMultiGraphicsAllocation().getGraphicsAllocation(0); + auto dstAllocation = pBuffer->getMultiGraphicsAllocation().getGraphicsAllocation(1); + + EXPECT_TRUE(srcAllocation->isAllocationLockable()); + EXPECT_TRUE(dstAllocation->isAllocationLockable()); + + pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->setCurrentLocation(0); + EXPECT_EQ(0u, pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + MigrationController::handleMigration(context, *pCsr1, pBuffer.get()); + EXPECT_EQ(1u, pBuffer->getMultiGraphicsAllocation().getMigrationSyncData()->getCurrentLocation()); + + EXPECT_EQ(2u, memoryManager->lockResourceCalled); + EXPECT_EQ(2u, memoryManager->unlockResourceCalled); + EXPECT_EQ(0u, pCsr1->peekLatestFlushedTaskCount()); + EXPECT_EQ(0u, pCsr0->peekLatestFlushedTaskCount()); +} + +TEST_F(MigrationControllerTests, givenMultiGraphicsAllocationUsedInOneCsrWhenHandlingMigrationToOtherCsrOnTheSameRootDeviceThenWaitOnCpuForTheFirstCsrCompletion) { + VariableBackup createFuncBackup{&MultiGraphicsAllocation::createMigrationSyncDataFunc}; + MultiGraphicsAllocation::createMigrationSyncDataFunc = [](size_t size) -> MigrationSyncData * { + return new MockMigrationSyncData(size); + }; + + std::unique_ptr pImage(Image1dHelper<>::create(&context)); + + ASSERT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); + + auto migrationSyncData = static_cast(pImage->getMultiGraphicsAllocation().getMigrationSyncData()); + + EXPECT_EQ(0u, migrationSyncData->waitOnCpuCalled); + + migrationSyncData->setCurrentLocation(0); + EXPECT_EQ(0u, migrationSyncData->getCurrentLocation()); + MigrationController::handleMigration(context, *pCsr0, pImage.get()); + EXPECT_EQ(0u, migrationSyncData->getCurrentLocation()); + + EXPECT_EQ(0u, memoryManager->lockResourceCalled); + EXPECT_EQ(0u, memoryManager->unlockResourceCalled); + EXPECT_EQ(0u, pCsr1->peekLatestFlushedTaskCount()); + EXPECT_EQ(0u, pCsr0->peekLatestFlushedTaskCount()); + EXPECT_EQ(1u, migrationSyncData->waitOnCpuCalled); +} + +TEST_F(MigrationControllerTests, givenMultiGraphicsAllocationUsedInOneCsrWhenHandlingMigrationToTheSameCsrThenDontWaitOnCpu) { + VariableBackup createFuncBackup{&MultiGraphicsAllocation::createMigrationSyncDataFunc}; + MultiGraphicsAllocation::createMigrationSyncDataFunc = [](size_t size) -> MigrationSyncData * { + return new MockMigrationSyncData(size); + }; + + std::unique_ptr pImage(Image1dHelper<>::create(&context)); + + ASSERT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); + + auto migrationSyncData = static_cast(pImage->getMultiGraphicsAllocation().getMigrationSyncData()); + + EXPECT_EQ(0u, migrationSyncData->waitOnCpuCalled); + + migrationSyncData->signalUsage(pCsr0->getTagAddress(), 0u); + migrationSyncData->setCurrentLocation(0); + EXPECT_EQ(0u, migrationSyncData->getCurrentLocation()); + MigrationController::handleMigration(context, *pCsr0, pImage.get()); + EXPECT_EQ(0u, migrationSyncData->getCurrentLocation()); + + EXPECT_EQ(0u, memoryManager->lockResourceCalled); + EXPECT_EQ(0u, memoryManager->unlockResourceCalled); + EXPECT_EQ(0u, pCsr1->peekLatestFlushedTaskCount()); + EXPECT_EQ(0u, pCsr0->peekLatestFlushedTaskCount()); + EXPECT_EQ(0u, migrationSyncData->waitOnCpuCalled); +} + +TEST_F(MigrationControllerTests, whenHandleMigrationThenProperTagAddressAndTaskCountIsSet) { + VariableBackup createFuncBackup{&MultiGraphicsAllocation::createMigrationSyncDataFunc}; + MultiGraphicsAllocation::createMigrationSyncDataFunc = [](size_t size) -> MigrationSyncData * { + return new MockMigrationSyncData(size); + }; + + std::unique_ptr pImage(Image1dHelper<>::create(&context)); + + ASSERT_TRUE(pImage->getMultiGraphicsAllocation().requiresMigrations()); + + auto migrationSyncData = static_cast(pImage->getMultiGraphicsAllocation().getMigrationSyncData()); + + migrationSyncData->setCurrentLocation(0); + MigrationController::handleMigration(context, *pCsr0, pImage.get()); + + EXPECT_EQ(pCsr0->getTagAddress(), migrationSyncData->tagAddress); + EXPECT_EQ(pCsr0->peekTaskCount() + 1, migrationSyncData->latestTaskCountUsed); +} \ No newline at end of file diff --git a/opencl/test/unit_test/mocks/mock_context.cpp b/opencl/test/unit_test/mocks/mock_context.cpp index 5014ed82ee..89af7b9810 100644 --- a/opencl/test/unit_test/mocks/mock_context.cpp +++ b/opencl/test/unit_test/mocks/mock_context.cpp @@ -173,12 +173,14 @@ SchedulerKernel &MockContext::getSchedulerKernel() { return *static_cast(schedulerBuiltIn->pKernel); } -MockDefaultContext::MockDefaultContext() : MockContext(nullptr, nullptr) { +MockDefaultContext::MockDefaultContext() : MockDefaultContext(false) {} + +MockDefaultContext::MockDefaultContext(bool initSpecialQueues) : MockContext(nullptr, nullptr) { pRootDevice0 = ultClDeviceFactory.rootDevices[0]; pRootDevice1 = ultClDeviceFactory.rootDevices[1]; pRootDevice2 = ultClDeviceFactory.rootDevices[2]; cl_device_id deviceIds[] = {pRootDevice0, pRootDevice1, pRootDevice2}; - initializeWithDevices(ClDeviceVector{deviceIds, 3}, true); + initializeWithDevices(ClDeviceVector{deviceIds, 3}, !initSpecialQueues); } MockSpecializedContext::MockSpecializedContext() : MockContext(nullptr, nullptr) { diff --git a/opencl/test/unit_test/mocks/mock_context.h b/opencl/test/unit_test/mocks/mock_context.h index 0adaad2cab..8a8e66f112 100644 --- a/opencl/test/unit_test/mocks/mock_context.h +++ b/opencl/test/unit_test/mocks/mock_context.h @@ -56,6 +56,7 @@ class MockContext : public Context { struct MockDefaultContext : MockContext { MockDefaultContext(); + MockDefaultContext(bool initSpecialQueues); UltClDeviceFactory ultClDeviceFactory{3, 0}; MockClDevice *pRootDevice0; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index d74323b81f..0dcbaae950 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -221,6 +221,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( if (updateTag) { PipeControlArgs args(dispatchFlags.dcFlush); args.notifyEnable = isUsedNotifyEnableForPostSync(); + args.tlbInvalidation |= dispatchFlags.memoryMigrationRequired; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( commandStreamTask, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, diff --git a/shared/source/command_stream/csr_definitions.h b/shared/source/command_stream/csr_definitions.h index e6fa1461d2..bf656e2627 100644 --- a/shared/source/command_stream/csr_definitions.h +++ b/shared/source/command_stream/csr_definitions.h @@ -56,33 +56,34 @@ struct DispatchFlags { KernelExecutionType kernelExecutionTypeP, MemoryCompressionState memoryCompressionStateP, uint64_t sliceCountP, bool blockingP, bool dcFlushP, bool useSLMP, bool guardCommandBufferWithPipeControlP, bool gsba32BitRequiredP, bool requiresCoherencyP, bool lowPriorityP, bool implicitFlushP, bool outOfOrderExecutionAllowedP, bool epilogueRequiredP, - bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, size_t areMultipleSubDevicesInContextP) : csrDependencies(csrDependenciesP), - barrierTimestampPacketNodes(barrierTimestampPacketNodesP), - pipelineSelectArgs(pipelineSelectArgsP), - flushStampReference(flushStampReferenceP), - throttle(throttleP), - preemptionMode(preemptionModeP), - numGrfRequired(numGrfRequiredP), - l3CacheSettings(l3CacheSettingsP), - threadArbitrationPolicy(threadArbitrationPolicyP), - additionalKernelExecInfo(additionalKernelExecInfoP), - kernelExecutionType(kernelExecutionTypeP), - memoryCompressionState(memoryCompressionStateP), - sliceCount(sliceCountP), - blocking(blockingP), - dcFlush(dcFlushP), - useSLM(useSLMP), - guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP), - gsba32BitRequired(gsba32BitRequiredP), - requiresCoherency(requiresCoherencyP), - lowPriority(lowPriorityP), - implicitFlush(implicitFlushP), - outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP), - epilogueRequired(epilogueRequiredP), - usePerDssBackedBuffer(usePerDSSbackedBufferP), - useSingleSubdevice(useSingleSubdeviceP), - useGlobalAtomics(useGlobalAtomicsP), - areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP){}; + bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, bool areMultipleSubDevicesInContextP, bool memoryMigrationRequiredP) : csrDependencies(csrDependenciesP), + barrierTimestampPacketNodes(barrierTimestampPacketNodesP), + pipelineSelectArgs(pipelineSelectArgsP), + flushStampReference(flushStampReferenceP), + throttle(throttleP), + preemptionMode(preemptionModeP), + numGrfRequired(numGrfRequiredP), + l3CacheSettings(l3CacheSettingsP), + threadArbitrationPolicy(threadArbitrationPolicyP), + additionalKernelExecInfo(additionalKernelExecInfoP), + kernelExecutionType(kernelExecutionTypeP), + memoryCompressionState(memoryCompressionStateP), + sliceCount(sliceCountP), + blocking(blockingP), + dcFlush(dcFlushP), + useSLM(useSLMP), + guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP), + gsba32BitRequired(gsba32BitRequiredP), + requiresCoherency(requiresCoherencyP), + lowPriority(lowPriorityP), + implicitFlush(implicitFlushP), + outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP), + epilogueRequired(epilogueRequiredP), + usePerDssBackedBuffer(usePerDSSbackedBufferP), + useSingleSubdevice(useSingleSubdeviceP), + useGlobalAtomics(useGlobalAtomicsP), + areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP), + memoryMigrationRequired(memoryMigrationRequiredP){}; CsrDependencies csrDependencies; TimestampPacketContainer *barrierTimestampPacketNodes = nullptr; @@ -112,6 +113,7 @@ struct DispatchFlags { bool useSingleSubdevice = false; bool useGlobalAtomics = false; bool areMultipleSubDevicesInContext = false; + bool memoryMigrationRequired = false; }; struct CsrSizeRequestFlags { diff --git a/shared/source/memory_manager/CMakeLists.txt b/shared/source/memory_manager/CMakeLists.txt index b126109e0c..1911ebded6 100644 --- a/shared/source/memory_manager/CMakeLists.txt +++ b/shared/source/memory_manager/CMakeLists.txt @@ -40,6 +40,8 @@ set(NEO_CORE_MEMORY_MANAGER ${CMAKE_CURRENT_SOURCE_DIR}/memory_operations_handler.h ${CMAKE_CURRENT_SOURCE_DIR}/memory_operations_status.h ${CMAKE_CURRENT_SOURCE_DIR}/memory_pool.h + ${CMAKE_CURRENT_SOURCE_DIR}/migration_sync_data.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/migration_sync_data.h ${CMAKE_CURRENT_SOURCE_DIR}/multi_graphics_allocation.cpp ${CMAKE_CURRENT_SOURCE_DIR}/multi_graphics_allocation.h ${CMAKE_CURRENT_SOURCE_DIR}/os_agnostic_memory_manager.cpp diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index a3cf21493b..97ddd5b6dc 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -126,7 +126,7 @@ GraphicsAllocation *MemoryManager::allocateGraphicsMemoryWithHostPtr(const Alloc GraphicsAllocation *MemoryManager::allocateGraphicsMemoryForImageFromHostPtr(const AllocationData &allocationData) { bool copyRequired = isCopyRequired(*allocationData.imgInfo, allocationData.hostPtr); - if (allocationData.hostPtr && (!copyRequired || allocationData.flags.crossRootDeviceAccess)) { + if (allocationData.hostPtr && !copyRequired) { return allocateGraphicsMemoryWithHostPtr(allocationData); } return nullptr; diff --git a/shared/source/memory_manager/migration_sync_data.cpp b/shared/source/memory_manager/migration_sync_data.cpp new file mode 100644 index 0000000000..5c33559dd6 --- /dev/null +++ b/shared/source/memory_manager/migration_sync_data.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/memory_manager/migration_sync_data.h" + +#include "shared/source/helpers/aligned_memory.h" +#include "shared/source/helpers/constants.h" + +namespace NEO { + +MigrationSyncData::MigrationSyncData(size_t size) { + hostPtr = alignedMalloc(size, MemoryConstants::pageSize); +} +MigrationSyncData::~MigrationSyncData() { + alignedFree(hostPtr); +} + +uint32_t MigrationSyncData::getCurrentLocation() const { return currentLocation; } +bool MigrationSyncData::isUsedByTheSameContext(volatile uint32_t *tagAddress) const { return this->tagAddress == tagAddress; } + +void MigrationSyncData::setCurrentLocation(uint32_t rootDeviceIndex) { + currentLocation = rootDeviceIndex; + migrationInProgress = false; +} + +void MigrationSyncData::signalUsage(volatile uint32_t *tagAddress, uint32_t taskCount) { + this->tagAddress = tagAddress; + latestTaskCountUsed = taskCount; +} + +void MigrationSyncData::waitOnCpu() { + while (tagAddress != nullptr) { + auto taskCount = *tagAddress; + if (taskCount >= latestTaskCountUsed) { + tagAddress = nullptr; + } else { + yield(); + } + }; +}; + +void MigrationSyncData::startMigration() { + migrationInProgress = true; +} +void MigrationSyncData::yield() const { + std::this_thread::yield(); +} +} // namespace NEO diff --git a/shared/source/memory_manager/migration_sync_data.h b/shared/source/memory_manager/migration_sync_data.h new file mode 100644 index 0000000000..7b822761b9 --- /dev/null +++ b/shared/source/memory_manager/migration_sync_data.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/utilities/reference_tracked_object.h" + +#include +#include + +namespace NEO { +class MigrationSyncData : public ReferenceTrackedObject { + public: + static constexpr uint32_t locationUndefined = std::numeric_limits::max(); + + MigrationSyncData(size_t size); + ~MigrationSyncData(); + uint32_t getCurrentLocation() const; + void startMigration(); + void setCurrentLocation(uint32_t rootDeviceIndex); + void signalUsage(volatile uint32_t *tagAddress, uint32_t taskCount); + bool isUsedByTheSameContext(volatile uint32_t *tagAddress) const; + MOCKABLE_VIRTUAL void waitOnCpu(); + bool isMigrationInProgress() const { return migrationInProgress; } + void *getHostPtr() const { return hostPtr; } + + protected: + MOCKABLE_VIRTUAL void yield() const; + volatile uint32_t *tagAddress = nullptr; + void *hostPtr = nullptr; + uint32_t latestTaskCountUsed = 0u; + uint32_t currentLocation = locationUndefined; + bool migrationInProgress = false; +}; +} // namespace NEO \ No newline at end of file diff --git a/shared/source/memory_manager/multi_graphics_allocation.cpp b/shared/source/memory_manager/multi_graphics_allocation.cpp index 7108537aa5..7724b29271 100644 --- a/shared/source/memory_manager/multi_graphics_allocation.cpp +++ b/shared/source/memory_manager/multi_graphics_allocation.cpp @@ -7,12 +7,35 @@ #include "shared/source/memory_manager/multi_graphics_allocation.h" +#include "shared/source/gmm_helper/gmm.h" +#include "shared/source/memory_manager/migration_sync_data.h" + namespace NEO { MultiGraphicsAllocation::MultiGraphicsAllocation(uint32_t maxRootDeviceIndex) { graphicsAllocations.resize(maxRootDeviceIndex + 1); } +MultiGraphicsAllocation::MultiGraphicsAllocation(const MultiGraphicsAllocation &multiGraphicsAllocation) { + this->graphicsAllocations = multiGraphicsAllocation.graphicsAllocations; + this->migrationSyncData = multiGraphicsAllocation.migrationSyncData; + this->isMultiStorage = multiGraphicsAllocation.isMultiStorage; + if (migrationSyncData) { + migrationSyncData->incRefInternal(); + } +} +MultiGraphicsAllocation::MultiGraphicsAllocation(MultiGraphicsAllocation &&multiGraphicsAllocation) { + this->graphicsAllocations = std::move(multiGraphicsAllocation.graphicsAllocations); + std::swap(this->migrationSyncData, multiGraphicsAllocation.migrationSyncData); + this->isMultiStorage = multiGraphicsAllocation.isMultiStorage; +}; + +MultiGraphicsAllocation::~MultiGraphicsAllocation() { + if (migrationSyncData) { + migrationSyncData->decRefInternal(); + } +} + GraphicsAllocation *MultiGraphicsAllocation::getDefaultGraphicsAllocation() const { for (auto &allocation : graphicsAllocations) { if (allocation) { @@ -48,4 +71,22 @@ StackVec const &MultiGraphicsAllocation::getGraphicsAll return graphicsAllocations; } +void MultiGraphicsAllocation::setMultiStorage(bool value) { + isMultiStorage = value; + if (isMultiStorage && !migrationSyncData) { + migrationSyncData = createMigrationSyncDataFunc(getDefaultGraphicsAllocation()->getUnderlyingBufferSize()); + migrationSyncData->incRefInternal(); + } +} + +bool MultiGraphicsAllocation::requiresMigrations() const { + if (migrationSyncData && migrationSyncData->isMigrationInProgress()) { + return false; + } + return isMultiStorage; +} + +decltype(MultiGraphicsAllocation::createMigrationSyncDataFunc) MultiGraphicsAllocation::createMigrationSyncDataFunc = [](size_t size) -> MigrationSyncData * { + return new MigrationSyncData(size); +}; } // namespace NEO diff --git a/shared/source/memory_manager/multi_graphics_allocation.h b/shared/source/memory_manager/multi_graphics_allocation.h index de6e41b370..5881cfa110 100644 --- a/shared/source/memory_manager/multi_graphics_allocation.h +++ b/shared/source/memory_manager/multi_graphics_allocation.h @@ -8,11 +8,18 @@ #pragma once #include "shared/source/memory_manager/graphics_allocation.h" +#include + namespace NEO { +class MigrationSyncData; + class MultiGraphicsAllocation { public: MultiGraphicsAllocation(uint32_t maxRootDeviceIndex); + MultiGraphicsAllocation(const MultiGraphicsAllocation &multiGraphicsAllocation); + MultiGraphicsAllocation(MultiGraphicsAllocation &&); + ~MultiGraphicsAllocation(); GraphicsAllocation *getDefaultGraphicsAllocation() const; @@ -28,7 +35,15 @@ class MultiGraphicsAllocation { StackVec const &getGraphicsAllocations() const; + bool requiresMigrations() const; + MigrationSyncData *getMigrationSyncData() const { return migrationSyncData; } + void setMultiStorage(bool value); + + static std::function createMigrationSyncDataFunc; + protected: + bool isMultiStorage = false; + MigrationSyncData *migrationSyncData = nullptr; StackVec graphicsAllocations; }; diff --git a/shared/test/common/helpers/dispatch_flags_helper.h b/shared/test/common/helpers/dispatch_flags_helper.h index 93e7e9b4bc..8e3d7c4485 100644 --- a/shared/test/common/helpers/dispatch_flags_helper.h +++ b/shared/test/common/helpers/dispatch_flags_helper.h @@ -40,6 +40,8 @@ struct DispatchFlagsHelper { false, //usePerDssBackedBuffer false, //useSingleSubdevice false, //useGlobalAtomics - false); //areMultipleSubDevicesInContext + false, //areMultipleSubDevicesInContext + false //memoryMigrationRequired + ); } }; diff --git a/shared/test/common/mocks/CMakeLists.txt b/shared/test/common/mocks/CMakeLists.txt index cd2702aadb..a43a36423f 100644 --- a/shared/test/common/mocks/CMakeLists.txt +++ b/shared/test/common/mocks/CMakeLists.txt @@ -34,8 +34,8 @@ set(NEO_CORE_tests_mocks ${CMAKE_CURRENT_SOURCE_DIR}/mock_deferred_deleter.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_device.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_device.h - ${CMAKE_CURRENT_SOURCE_DIR}/mock_direct_submission_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_direct_submission_diagnostic_collector.h + ${CMAKE_CURRENT_SOURCE_DIR}/mock_direct_submission_hw.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_dispatch_kernel_encoder_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mock_dispatch_kernel_encoder_interface.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_elf.h @@ -48,6 +48,8 @@ set(NEO_CORE_tests_mocks ${CMAKE_CURRENT_SOURCE_DIR}/mock_graphics_allocation.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_io_functions.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_memory_operations_handler.h + ${CMAKE_CURRENT_SOURCE_DIR}/mock_migration_sync_data.h + ${CMAKE_CURRENT_SOURCE_DIR}/mock_multi_graphics_allocation.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_os_library.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_physical_address_allocator.h ${CMAKE_CURRENT_SOURCE_DIR}/mock_sip.cpp diff --git a/shared/test/common/mocks/mock_graphics_allocation.h b/shared/test/common/mocks/mock_graphics_allocation.h index a09da4c5e1..326f36782f 100644 --- a/shared/test/common/mocks/mock_graphics_allocation.h +++ b/shared/test/common/mocks/mock_graphics_allocation.h @@ -60,5 +60,4 @@ static inline MultiGraphicsAllocation toMultiGraphicsAllocation(GraphicsAllocati } } // namespace GraphicsAllocationHelper - } // namespace NEO diff --git a/shared/test/common/mocks/mock_migration_sync_data.h b/shared/test/common/mocks/mock_migration_sync_data.h new file mode 100644 index 0000000000..6177bcce0c --- /dev/null +++ b/shared/test/common/mocks/mock_migration_sync_data.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/memory_manager/migration_sync_data.h" + +namespace NEO { + +struct MockMigrationSyncDataWithYield : public MigrationSyncData { + using MigrationSyncData::MigrationSyncData; + void yield() const override { + (*this->tagAddress)++; + MigrationSyncData::yield(); + } +}; +struct MockMigrationSyncData : public MigrationSyncData { + using MigrationSyncData::latestTaskCountUsed; + using MigrationSyncData::MigrationSyncData; + using MigrationSyncData::tagAddress; + void waitOnCpu() override { + waitOnCpuCalled++; + MigrationSyncData::waitOnCpu(); + } + + uint32_t waitOnCpuCalled = 0u; +}; + +} // namespace NEO diff --git a/shared/test/common/mocks/mock_multi_graphics_allocation.h b/shared/test/common/mocks/mock_multi_graphics_allocation.h new file mode 100644 index 0000000000..ad49b690ac --- /dev/null +++ b/shared/test/common/mocks/mock_multi_graphics_allocation.h @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/memory_manager/multi_graphics_allocation.h" + +namespace NEO { + +struct MockMultiGraphicsAllocation : public MultiGraphicsAllocation { + using MultiGraphicsAllocation::graphicsAllocations; + using MultiGraphicsAllocation::migrationSyncData; + using MultiGraphicsAllocation::MultiGraphicsAllocation; +}; + +} // namespace NEO diff --git a/shared/test/unit_test/memory_manager/multi_graphics_allocation_tests.cpp b/shared/test/unit_test/memory_manager/multi_graphics_allocation_tests.cpp index a5648c8968..b7ba196637 100644 --- a/shared/test/unit_test/memory_manager/multi_graphics_allocation_tests.cpp +++ b/shared/test/unit_test/memory_manager/multi_graphics_allocation_tests.cpp @@ -6,7 +6,12 @@ */ #include "shared/source/helpers/aligned_memory.h" +#include "shared/source/memory_manager/migration_sync_data.h" #include "shared/source/memory_manager/multi_graphics_allocation.h" +#include "shared/test/common/mocks/mock_device.h" +#include "shared/test/common/mocks/mock_migration_sync_data.h" +#include "shared/test/common/mocks/mock_multi_graphics_allocation.h" +#include "shared/test/common/mocks/ult_device_factory.h" #include "opencl/test/unit_test/mocks/mock_memory_manager.h" @@ -14,11 +19,6 @@ using namespace NEO; -struct MockMultiGraphicsAllocation : public MultiGraphicsAllocation { - using MultiGraphicsAllocation::graphicsAllocations; - using MultiGraphicsAllocation::MultiGraphicsAllocation; -}; - TEST(MultiGraphicsAllocationTest, whenCreatingMultiGraphicsAllocationThenTheAllocationIsObtainableAsADefault) { GraphicsAllocation graphicsAllocation(1, // rootDeviceIndex GraphicsAllocation::AllocationType::BUFFER, @@ -104,3 +104,212 @@ TEST(MultiGraphicsAllocationTest, givenMultiGraphicsAllocationWhenRemovingGraphi EXPECT_EQ(nullptr, multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex)); } + +struct MultiGraphicsAllocationTests : ::testing::Test { + + void SetUp() override { + memoryManager = deviceFactory.rootDevices[0]->getMemoryManager(); + } + void TearDown() override { + for (auto &rootDeviceIndex : rootDeviceIndices) { + memoryManager->freeGraphicsMemory(multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex)); + } + } + + UltDeviceFactory deviceFactory{2, 0}; + MockMultiGraphicsAllocation multiGraphicsAllocation{1}; + std::vector rootDeviceIndices{{0u, 1u}}; + MemoryManager *memoryManager = nullptr; +}; + +TEST_F(MultiGraphicsAllocationTests, whenCreatingMultiGraphicsAllocationWithSharedStorageThenMigrationIsNotRequired) { + + AllocationProperties allocationProperties{0u, + true, //allocateMemory + MemoryConstants::pageSize, + GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, + false, //multiOsContextCapable + false, //isMultiStorageAllocationParam + systemMemoryBitfield}; + + auto ptr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, allocationProperties, multiGraphicsAllocation); + EXPECT_NE(nullptr, ptr); + + EXPECT_EQ(2u, multiGraphicsAllocation.graphicsAllocations.size()); + + EXPECT_NE(nullptr, multiGraphicsAllocation.getGraphicsAllocation(0)->getUnderlyingBuffer()); + EXPECT_EQ(multiGraphicsAllocation.getGraphicsAllocation(0)->getUnderlyingBuffer(), multiGraphicsAllocation.getGraphicsAllocation(1)->getUnderlyingBuffer()); + + EXPECT_FALSE(multiGraphicsAllocation.requiresMigrations()); +} + +TEST_F(MultiGraphicsAllocationTests, whenCreatingMultiGraphicsAllocationWithExistingSystemMemoryThenMigrationIsNotRequired) { + + uint8_t hostPtr[MemoryConstants::pageSize]{}; + + AllocationProperties allocationProperties{0u, + false, //allocateMemory + MemoryConstants::pageSize, + GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, + false, //multiOsContextCapable + false, //isMultiStorageAllocationParam + systemMemoryBitfield}; + + multiGraphicsAllocation.addAllocation(memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties, hostPtr)); + EXPECT_FALSE(multiGraphicsAllocation.requiresMigrations()); + + allocationProperties.rootDeviceIndex = 1u; + multiGraphicsAllocation.addAllocation(memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties, hostPtr)); + + EXPECT_FALSE(multiGraphicsAllocation.requiresMigrations()); + + multiGraphicsAllocation.setMultiStorage(false); + EXPECT_FALSE(multiGraphicsAllocation.requiresMigrations()); +} + +TEST_F(MultiGraphicsAllocationTests, whenCreatingMultiGraphicsAllocationWithSeparatedStorageThenMigrationIsRequired) { + AllocationProperties allocationProperties{0u, + true, //allocateMemory + MemoryConstants::pageSize, + GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, + false, //multiOsContextCapable + false, //isMultiStorageAllocationParam + systemMemoryBitfield}; + + multiGraphicsAllocation.addAllocation(memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties)); + EXPECT_FALSE(multiGraphicsAllocation.requiresMigrations()); + + allocationProperties.rootDeviceIndex = 1u; + multiGraphicsAllocation.addAllocation(memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties)); + EXPECT_FALSE(multiGraphicsAllocation.requiresMigrations()); + + multiGraphicsAllocation.setMultiStorage(true); + EXPECT_TRUE(multiGraphicsAllocation.requiresMigrations()); +} + +TEST_F(MultiGraphicsAllocationTests, givenMultiGraphicsAllocationThatRequiresMigrationWhenCopyOrMoveMultiGraphicsAllocationThenTheCopyStillRequiresMigration) { + AllocationProperties allocationProperties{0u, + true, //allocateMemory + MemoryConstants::pageSize, + GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, + false, //multiOsContextCapable + false, //isMultiStorageAllocationParam + systemMemoryBitfield}; + + multiGraphicsAllocation.addAllocation(memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties)); + + allocationProperties.rootDeviceIndex = 1u; + multiGraphicsAllocation.addAllocation(memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties)); + + multiGraphicsAllocation.setMultiStorage(true); + EXPECT_TRUE(multiGraphicsAllocation.requiresMigrations()); + EXPECT_EQ(1, multiGraphicsAllocation.migrationSyncData->getRefInternalCount()); + { + + auto copyMultiGraphicsAllocation(multiGraphicsAllocation); + EXPECT_TRUE(copyMultiGraphicsAllocation.requiresMigrations()); + EXPECT_EQ(2, multiGraphicsAllocation.migrationSyncData->getRefInternalCount()); + + auto movedMultiGraphicsAllocation(std::move(copyMultiGraphicsAllocation)); + EXPECT_TRUE(movedMultiGraphicsAllocation.requiresMigrations()); + EXPECT_EQ(2, multiGraphicsAllocation.migrationSyncData->getRefInternalCount()); + } + EXPECT_EQ(1, multiGraphicsAllocation.migrationSyncData->getRefInternalCount()); +} + +struct MigrationSyncDataTests : public MultiGraphicsAllocationTests { + void SetUp() override { + MultiGraphicsAllocationTests::SetUp(); + AllocationProperties allocationProperties{0u, + true, //allocateMemory + MemoryConstants::pageSize, + GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY, + false, //multiOsContextCapable + false, //isMultiStorageAllocationParam + systemMemoryBitfield}; + + multiGraphicsAllocation.addAllocation(memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties)); + + allocationProperties.rootDeviceIndex = 1u; + multiGraphicsAllocation.addAllocation(memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties)); + + multiGraphicsAllocation.setMultiStorage(true); + EXPECT_TRUE(multiGraphicsAllocation.requiresMigrations()); + + migrationSyncData = multiGraphicsAllocation.getMigrationSyncData(); + } + + void TearDown() override { + MultiGraphicsAllocationTests::TearDown(); + } + + MigrationSyncData *migrationSyncData = nullptr; +}; + +TEST_F(MigrationSyncDataTests, whenMigrationSyncDataExistsAndSetMultiStorageIsCalledThenReuseSameMigrationSyncData) { + + EXPECT_NE(nullptr, migrationSyncData); + + multiGraphicsAllocation.setMultiStorage(true); + + EXPECT_EQ(migrationSyncData, multiGraphicsAllocation.getMigrationSyncData()); +} + +TEST_F(MigrationSyncDataTests, whenMigrationIsNotStartedThenMigrationIsNotInProgress) { + EXPECT_FALSE(migrationSyncData->isMigrationInProgress()); + + migrationSyncData->startMigration(); + + EXPECT_TRUE(migrationSyncData->isMigrationInProgress()); +} + +TEST_F(MigrationSyncDataTests, whenMigrationIsInProgressThenMultigraphicsAllocationDoesntRequireMigration) { + EXPECT_TRUE(multiGraphicsAllocation.requiresMigrations()); + migrationSyncData->startMigration(); + + EXPECT_TRUE(migrationSyncData->isMigrationInProgress()); + EXPECT_FALSE(multiGraphicsAllocation.requiresMigrations()); +} + +TEST_F(MigrationSyncDataTests, whenSetTargetLocationIsCalledThenProperLocationIsSetAndMigrationIsStopped) { + migrationSyncData->startMigration(); + + EXPECT_TRUE(migrationSyncData->isMigrationInProgress()); + + migrationSyncData->setCurrentLocation(0u); + EXPECT_FALSE(migrationSyncData->isMigrationInProgress()); + EXPECT_EQ(0u, migrationSyncData->getCurrentLocation()); +} + +TEST(MigrationSyncDataTest, whenWaitOnCpuIsCalledThenWaitForValueSpecifiedInSignalUsageMethod) { + auto migrationSyncData = std::make_unique(MemoryConstants::pageSize); + uint32_t tagAddress = 0; + + migrationSyncData->signalUsage(&tagAddress, 2u); + migrationSyncData->waitOnCpu(); + EXPECT_EQ(2u, tagAddress); +} + +TEST(MigrationSyncDataTest, whenTaskCountIsHigherThanExpectedThenWaitOnCpuDoesntHang) { + auto migrationSyncData = std::make_unique(MemoryConstants::pageSize); + uint32_t tagAddress = 5u; + + migrationSyncData->signalUsage(&tagAddress, 2u); + EXPECT_EQ(&tagAddress, migrationSyncData->tagAddress); + EXPECT_EQ(2u, migrationSyncData->latestTaskCountUsed); + + migrationSyncData->waitOnCpu(); + EXPECT_EQ(5u, tagAddress); +} + +TEST_F(MigrationSyncDataTests, givenNoSignaledUsageWhenWaitOnCpuIsCalledThenEarlyReturnAndDontCrash) { + EXPECT_NO_THROW(migrationSyncData->waitOnCpu()); + migrationSyncData->signalUsage(nullptr, 2u); + EXPECT_NO_THROW(migrationSyncData->waitOnCpu()); +} + +TEST_F(MigrationSyncDataTests, whenGetHostPtrMethodIsCalledThenAlignedPointerIsReturned) { + auto hostPtr = reinterpret_cast(migrationSyncData->getHostPtr()); + + EXPECT_TRUE(isAligned(hostPtr, MemoryConstants::pageSize)); +}