mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
Add mechanism to migrate multi root device memory
invalidate TLB cache if kernel requires migration Related-To: NEO-3691 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
714a1ebf53
commit
6f3c89decb
@@ -35,6 +35,7 @@
|
||||
#include "opencl/source/helpers/task_information.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
#include "opencl/source/memory_manager/migration_controller.h"
|
||||
#include "opencl/source/program/block_kernel_manager.h"
|
||||
#include "opencl/source/program/printf_handler.h"
|
||||
|
||||
@@ -289,6 +290,14 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
const EnqueueProperties enqueueProperties(false, !multiDispatchInfo.empty(), isCacheFlushCommand(commandType),
|
||||
flushDependenciesForNonKernelCommand, isMarkerWithProfiling, &blitPropertiesContainer);
|
||||
|
||||
bool migratedMemory = false;
|
||||
|
||||
if (!blockQueue && multiDispatchInfo.peekMainKernel() && multiDispatchInfo.peekMainKernel()->requiresMemoryMigration()) {
|
||||
for (auto &arg : multiDispatchInfo.peekMainKernel()->getMemObjectsToMigrate()) {
|
||||
MigrationController::handleMigration(*this->context, getGpgpuCommandStreamReceiver(), arg.second);
|
||||
migratedMemory = true;
|
||||
}
|
||||
}
|
||||
if (!blockQueue) {
|
||||
if (parentKernel) {
|
||||
processDeviceEnqueue(devQueueHw, multiDispatchInfo, hwTimeStamps, blocking);
|
||||
@@ -395,6 +404,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
if (blocking) {
|
||||
waitUntilComplete(blockQueue, (blockQueue ? nullptr : printfHandler.get()));
|
||||
}
|
||||
if (migratedMemory) {
|
||||
getGpgpuCommandStreamReceiver().flushBatchedSubmissions();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@@ -860,7 +872,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
usePerDssBackedBuffer, //usePerDssBackedBuffer
|
||||
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
|
||||
useGlobalAtomics, //useGlobalAtomics
|
||||
kernel->areMultipleSubDevicesInContext() //areMultipleSubDevicesInContext
|
||||
kernel->areMultipleSubDevicesInContext(), //areMultipleSubDevicesInContext
|
||||
kernel->requiresMemoryMigration() //memoryMigrationRequired
|
||||
);
|
||||
|
||||
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
|
||||
@@ -1054,6 +1067,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
surface->makeResident(getGpgpuCommandStreamReceiver());
|
||||
}
|
||||
|
||||
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
|
||||
DispatchFlags dispatchFlags(
|
||||
{}, //csrDependencies
|
||||
×tampPacketDependencies.barrierNodes, //barrierTimestampPacketNodes
|
||||
@@ -1081,7 +1095,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
false, //usePerDssBackedBuffer
|
||||
false, //useSingleSubdevice
|
||||
false, //useGlobalAtomics
|
||||
1u); //numDevicesInContext
|
||||
context->containsMultipleSubDevices(rootDeviceIndex), //areMultipleSubDevicesInContext
|
||||
false); //memoryMigrationRequired
|
||||
|
||||
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
|
||||
|
||||
@@ -77,7 +77,8 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
|
||||
false, //usePerDssBackedBuffer
|
||||
false, //useSingleSubdevice
|
||||
false, //useGlobalAtomics
|
||||
1u); //numDevicesInContext
|
||||
false, //areMultipleSubDevicesInContext
|
||||
false); //memoryMigrationRequired
|
||||
|
||||
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
|
||||
|
||||
@@ -243,7 +244,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
kernel->requiresPerDssBackedBuffer(), //usePerDssBackedBuffer
|
||||
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
|
||||
kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
|
||||
kernel->areMultipleSubDevicesInContext()); //areMultipleSubDevicesInContext
|
||||
kernel->areMultipleSubDevicesInContext(), //areMultipleSubDevicesInContext
|
||||
kernel->requiresMemoryMigration()); //memoryMigrationRequired
|
||||
|
||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||
@@ -269,6 +271,12 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
|
||||
gtpinNotifyPreFlushTask(&commandQueue);
|
||||
|
||||
if (kernel->requiresMemoryMigration()) {
|
||||
for (auto &arg : kernel->getMemObjectsToMigrate()) {
|
||||
MigrationController::handleMigration(commandQueue.getContext(), commandStreamReceiver, arg.second);
|
||||
}
|
||||
}
|
||||
|
||||
completionStamp = commandStreamReceiver.flushTask(*kernelOperation->commandStream,
|
||||
0,
|
||||
*dsh,
|
||||
@@ -351,34 +359,36 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
||||
}
|
||||
}
|
||||
|
||||
auto rootDeviceIndex = commandStreamReceiver.getRootDeviceIndex();
|
||||
DispatchFlags dispatchFlags(
|
||||
{}, //csrDependencies
|
||||
barrierNodes, //barrierTimestampPacketNodes
|
||||
{}, //pipelineSelectArgs
|
||||
commandQueue.flushStamp->getStampReference(), //flushStampReference
|
||||
commandQueue.getThrottle(), //throttle
|
||||
commandQueue.getDevice().getPreemptionMode(), //preemptionMode
|
||||
GrfConfig::NotApplicable, //numGrfRequired
|
||||
L3CachingSettings::NotApplicable, //l3CacheSettings
|
||||
ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy
|
||||
AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo
|
||||
KernelExecutionType::NotApplicable, //kernelExecutionType
|
||||
MemoryCompressionState::NotApplicable, //memoryCompressionState
|
||||
commandQueue.getSliceCount(), //sliceCount
|
||||
true, //blocking
|
||||
false, //dcFlush
|
||||
false, //useSLM
|
||||
true, //guardCommandBufferWithPipeControl
|
||||
false, //GSBA32BitRequired
|
||||
false, //requiresCoherency
|
||||
commandQueue.getPriority() == QueuePriority::LOW, //lowPriority
|
||||
false, //implicitFlush
|
||||
commandStreamReceiver.isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
|
||||
false, //epilogueRequired
|
||||
false, //usePerDssBackedBuffer
|
||||
false, //useSingleSubdevice
|
||||
false, //useGlobalAtomics
|
||||
1u); //numDevicesInContext
|
||||
{}, //csrDependencies
|
||||
barrierNodes, //barrierTimestampPacketNodes
|
||||
{}, //pipelineSelectArgs
|
||||
commandQueue.flushStamp->getStampReference(), //flushStampReference
|
||||
commandQueue.getThrottle(), //throttle
|
||||
commandQueue.getDevice().getPreemptionMode(), //preemptionMode
|
||||
GrfConfig::NotApplicable, //numGrfRequired
|
||||
L3CachingSettings::NotApplicable, //l3CacheSettings
|
||||
ThreadArbitrationPolicy::NotPresent, //threadArbitrationPolicy
|
||||
AdditionalKernelExecInfo::NotApplicable, //additionalKernelExecInfo
|
||||
KernelExecutionType::NotApplicable, //kernelExecutionType
|
||||
MemoryCompressionState::NotApplicable, //memoryCompressionState
|
||||
commandQueue.getSliceCount(), //sliceCount
|
||||
true, //blocking
|
||||
false, //dcFlush
|
||||
false, //useSLM
|
||||
true, //guardCommandBufferWithPipeControl
|
||||
false, //GSBA32BitRequired
|
||||
false, //requiresCoherency
|
||||
commandQueue.getPriority() == QueuePriority::LOW, //lowPriority
|
||||
false, //implicitFlush
|
||||
commandStreamReceiver.isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
|
||||
false, //epilogueRequired
|
||||
false, //usePerDssBackedBuffer
|
||||
false, //useSingleSubdevice
|
||||
false, //useGlobalAtomics
|
||||
commandQueue.getContext().containsMultipleSubDevices(rootDeviceIndex), //areMultipleSubDevicesInContext
|
||||
false); //memoryMigrationRequired
|
||||
|
||||
UNRECOVERABLE_IF(!kernelOperation->blitEnqueue && !commandStreamReceiver.peekTimestampPacketWriteEnabled() && commandQueue.getContext().getRootDeviceIndices().size() == 1);
|
||||
|
||||
|
||||
@@ -773,6 +773,20 @@ void Kernel::markArgPatchedAndResolveArgs(uint32_t argIndex) {
|
||||
patchedArgumentsNum++;
|
||||
kernelArguments[argIndex].isPatched = true;
|
||||
}
|
||||
if (program->getContextPtr() && getContext().getRootDeviceIndices().size() > 1u && Kernel::isMemObj(kernelArguments[argIndex].type) && kernelArguments[argIndex].object) {
|
||||
auto argMemObj = castToObjectOrAbort<MemObj>(reinterpret_cast<cl_mem>(kernelArguments[argIndex].object));
|
||||
auto memObj = argMemObj->getHighestRootMemObj();
|
||||
auto migrateRequiredForArg = memObj->getMultiGraphicsAllocation().requiresMigrations();
|
||||
|
||||
if (migratableArgsMap.find(argIndex) == migratableArgsMap.end() && migrateRequiredForArg) {
|
||||
migratableArgsMap.insert({argIndex, memObj});
|
||||
} else if (migrateRequiredForArg) {
|
||||
migratableArgsMap[argIndex] = memObj;
|
||||
} else {
|
||||
migratableArgsMap.erase(argIndex);
|
||||
}
|
||||
}
|
||||
|
||||
resolveArgs();
|
||||
}
|
||||
|
||||
|
||||
@@ -424,6 +424,8 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
|
||||
void setMultiDeviceKernel(MultiDeviceKernel *pMultiDeviceKernelToSet) { pMultiDeviceKernel = pMultiDeviceKernelToSet; }
|
||||
|
||||
bool areMultipleSubDevicesInContext() const;
|
||||
bool requiresMemoryMigration() const { return migratableArgsMap.size() > 0; }
|
||||
const std::map<uint32_t, MemObj *> &getMemObjectsToMigrate() const { return migratableArgsMap; }
|
||||
|
||||
protected:
|
||||
struct ObjectCounts {
|
||||
@@ -549,6 +551,7 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
|
||||
|
||||
std::vector<PatchInfoData> patchInfoDataList;
|
||||
std::unique_ptr<ImageTransformer> imageTransformer;
|
||||
std::map<uint32_t, MemObj *> migratableArgsMap{};
|
||||
|
||||
bool specialPipelineSelectMode = false;
|
||||
bool svmAllocationsRequireCacheFlush = false;
|
||||
|
||||
@@ -137,6 +137,7 @@ Image *Image::create(Context *context,
|
||||
|
||||
auto &defaultHwHelper = HwHelper::get(context->getDevice(0)->getHardwareInfo().platform.eRenderCoreFamily);
|
||||
|
||||
bool transferedMemory = false;
|
||||
do {
|
||||
size_t imageWidth = imageDesc->image_width;
|
||||
size_t imageHeight = 1;
|
||||
@@ -239,8 +240,6 @@ Image *Image::create(Context *context,
|
||||
AllocationInfoType allocationInfo;
|
||||
allocationInfo.resize(maxRootDeviceIndex + 1u);
|
||||
bool isParentObject = parentBuffer || parentImage;
|
||||
void *cpuPtr = nullptr;
|
||||
void *hostPtrForced = nullptr;
|
||||
|
||||
for (auto &rootDeviceIndex : context->getRootDeviceIndices()) {
|
||||
allocationInfo[rootDeviceIndex] = {};
|
||||
@@ -290,7 +289,6 @@ Image *Image::create(Context *context,
|
||||
false, // allocateMemory
|
||||
memoryProperties, hwInfo,
|
||||
context->getDeviceBitfieldForAllocation(rootDeviceIndex));
|
||||
allocProperties.flags.crossRootDeviceAccess = context->getRootDeviceIndices().size() > 1;
|
||||
|
||||
allocationInfo[rootDeviceIndex].memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties, hostPtr);
|
||||
|
||||
@@ -325,39 +323,11 @@ Image *Image::create(Context *context,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (context->getRootDeviceIndices().size() > 1) {
|
||||
MemoryProperties memoryPropertiesToSet = memoryProperties;
|
||||
memoryPropertiesToSet.flags.useHostPtr = true;
|
||||
memoryPropertiesToSet.flags.copyHostPtr = false;
|
||||
|
||||
if (cpuPtr) {
|
||||
AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(rootDeviceIndex, imgInfo,
|
||||
false, // allocateMemory
|
||||
const_cast<MemoryProperties &>(memoryPropertiesToSet), hwInfo,
|
||||
context->getDeviceBitfieldForAllocation(rootDeviceIndex));
|
||||
allocProperties.flags.crossRootDeviceAccess = true;
|
||||
|
||||
allocationInfo[rootDeviceIndex].memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties, cpuPtr);
|
||||
} else {
|
||||
AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(rootDeviceIndex, imgInfo,
|
||||
false, // allocateMemory
|
||||
const_cast<MemoryProperties &>(memoryPropertiesToSet), hwInfo,
|
||||
context->getDeviceBitfieldForAllocation(rootDeviceIndex));
|
||||
allocProperties.flags.crossRootDeviceAccess = true;
|
||||
auto hostPtrForcedSize = alignUp(hostPtrMinSize, MemoryConstants::pageSize);
|
||||
hostPtrForced = alignedMalloc(hostPtrForcedSize, MemoryConstants::pageSize);
|
||||
allocationInfo[rootDeviceIndex].memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties, hostPtrForced);
|
||||
if (allocationInfo[rootDeviceIndex].memory) {
|
||||
cpuPtr = reinterpret_cast<void *>(allocationInfo[rootDeviceIndex].memory->getUnderlyingBuffer());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(rootDeviceIndex, imgInfo,
|
||||
true, // allocateMemory
|
||||
memoryProperties, hwInfo,
|
||||
context->getDeviceBitfieldForAllocation(rootDeviceIndex));
|
||||
allocationInfo[rootDeviceIndex].memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties);
|
||||
}
|
||||
AllocationProperties allocProperties = MemObjHelper::getAllocationPropertiesWithImageInfo(rootDeviceIndex, imgInfo,
|
||||
true, // allocateMemory
|
||||
memoryProperties, hwInfo,
|
||||
context->getDeviceBitfieldForAllocation(rootDeviceIndex));
|
||||
allocationInfo[rootDeviceIndex].memory = memoryManager->allocateGraphicsMemoryWithProperties(allocProperties);
|
||||
|
||||
if (allocationInfo[rootDeviceIndex].memory && MemoryPool::isSystemMemoryPool(allocationInfo[rootDeviceIndex].memory->getMemoryPool())) {
|
||||
allocationInfo[rootDeviceIndex].zeroCopyAllowed = true;
|
||||
@@ -368,9 +338,6 @@ Image *Image::create(Context *context,
|
||||
|
||||
if (!allocationInfo[rootDeviceIndex].memory) {
|
||||
cleanAllGraphicsAllocations(*context, *memoryManager, allocationInfo, isParentObject);
|
||||
if (hostPtrForced) {
|
||||
alignedFree(hostPtrForced);
|
||||
}
|
||||
return image;
|
||||
}
|
||||
|
||||
@@ -403,11 +370,10 @@ Image *Image::create(Context *context,
|
||||
|
||||
auto defaultRootDeviceIndex = context->getDevice(0u)->getRootDeviceIndex();
|
||||
|
||||
multiGraphicsAllocation.setMultiStorage(context->getRootDeviceIndices().size() > 1);
|
||||
|
||||
image = createImageHw(context, memoryProperties, flags, flagsIntel, imgInfo.size, hostPtrToSet, surfaceFormat->OCLImageFormat,
|
||||
imageDescriptor, allocationInfo[defaultRootDeviceIndex].zeroCopyAllowed, std::move(multiGraphicsAllocation), false, 0, 0, surfaceFormat);
|
||||
if (hostPtrForced) {
|
||||
image->setAllocatedMapPtr(hostPtrForced);
|
||||
}
|
||||
|
||||
for (auto &rootDeviceIndex : context->getRootDeviceIndices()) {
|
||||
|
||||
@@ -451,7 +417,7 @@ Image *Image::create(Context *context,
|
||||
if (context->isProvidingPerformanceHints() && image->isMemObjZeroCopy()) {
|
||||
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_IMAGE_MEETS_ALIGNMENT_RESTRICTIONS, static_cast<cl_mem>(image));
|
||||
}
|
||||
if (allocationInfo[rootDeviceIndex].transferNeeded) {
|
||||
if (allocationInfo[rootDeviceIndex].transferNeeded && !transferedMemory) {
|
||||
std::array<size_t, 3> copyOrigin = {{0, 0, 0}};
|
||||
std::array<size_t, 3> copyRegion = {{imageWidth, imageHeight, std::max(imageDepth, imageCount)}};
|
||||
if (imageDesc->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
|
||||
@@ -488,6 +454,7 @@ Image *Image::create(Context *context,
|
||||
context->getMemoryManager()->unlockResource(allocationInfo[rootDeviceIndex].memory);
|
||||
}
|
||||
}
|
||||
transferedMemory = true;
|
||||
}
|
||||
|
||||
if (allocationInfo[rootDeviceIndex].mapAllocation) {
|
||||
@@ -1548,5 +1515,22 @@ cl_int Image::checkIfDeviceSupportsImages(cl_context context) {
|
||||
|
||||
return CL_INVALID_CONTEXT;
|
||||
}
|
||||
void Image::fillImageRegion(size_t *region) const {
|
||||
region[0] = imageDesc.image_width;
|
||||
if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
|
||||
region[1] = imageDesc.image_array_size;
|
||||
} else if (Image::isImage1d(imageDesc)) {
|
||||
region[1] = 1u;
|
||||
} else {
|
||||
region[1] = imageDesc.image_height;
|
||||
}
|
||||
|
||||
if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
|
||||
region[2] = imageDesc.image_array_size;
|
||||
} else if (imageDesc.image_type == CL_MEM_OBJECT_IMAGE3D) {
|
||||
region[2] = imageDesc.image_depth;
|
||||
} else {
|
||||
region[2] = 1u;
|
||||
}
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -199,6 +199,8 @@ class Image : public MemObj {
|
||||
|
||||
static cl_int checkIfDeviceSupportsImages(cl_context context);
|
||||
|
||||
void fillImageRegion(size_t *region) const;
|
||||
|
||||
protected:
|
||||
Image(Context *context,
|
||||
const MemoryProperties &memoryProperties,
|
||||
|
||||
@@ -151,6 +151,12 @@ class MemObj : public BaseObject<_cl_mem> {
|
||||
const cl_mem_flags &getFlagsIntel() const { return flagsIntel; }
|
||||
const MultiGraphicsAllocation &getMultiGraphicsAllocation() const { return multiGraphicsAllocation; }
|
||||
static void cleanAllGraphicsAllocations(Context &context, MemoryManager &memoryManager, AllocationInfoType &allocationInfo, bool isParentObject);
|
||||
MemObj *getHighestRootMemObj() {
|
||||
if (!associatedMemObject) {
|
||||
return this;
|
||||
}
|
||||
return associatedMemObject->getHighestRootMemObj();
|
||||
}
|
||||
|
||||
protected:
|
||||
void getOsSpecificMemObjectInfo(const cl_mem_info ¶mName, size_t *srcParamSize, void **srcParam);
|
||||
|
||||
@@ -9,6 +9,8 @@ set(RUNTIME_SRCS_MEMORY_MANAGER
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/compression_selector_ocl.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpu_page_fault_manager_memory_sync.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mem_obj_surface.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/migration_controller.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/migration_controller.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/resource_surface.h
|
||||
)
|
||||
|
||||
|
||||
97
opencl/source/memory_manager/migration_controller.cpp
Normal file
97
opencl/source/memory_manager/migration_controller.cpp
Normal file
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/memory_manager/migration_controller.h"
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/memory_manager/migration_sync_data.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
#include "opencl/source/mem_obj/mem_obj.h"
|
||||
|
||||
namespace NEO {
|
||||
class MemoryManager;
|
||||
class CommandStreamReceiver;
|
||||
class MultiGraphicsAllocation;
|
||||
void MigrationController::handleMigration(Context &context, CommandStreamReceiver &targetCsr, MemObj *memObj) {
|
||||
auto memoryManager = targetCsr.getMemoryManager();
|
||||
auto targetRootDeviceIndex = targetCsr.getRootDeviceIndex();
|
||||
auto migrationSyncData = memObj->getMultiGraphicsAllocation().getMigrationSyncData();
|
||||
if (!migrationSyncData->isUsedByTheSameContext(targetCsr.getTagAddress())) {
|
||||
migrationSyncData->waitOnCpu();
|
||||
}
|
||||
if (migrationSyncData->getCurrentLocation() != targetRootDeviceIndex) {
|
||||
migrateMemory(context, *memoryManager, memObj, targetRootDeviceIndex);
|
||||
}
|
||||
migrationSyncData->signalUsage(targetCsr.getTagAddress(), targetCsr.peekTaskCount() + 1);
|
||||
}
|
||||
|
||||
void MigrationController::migrateMemory(Context &context, MemoryManager &memoryManager, MemObj *memObj, uint32_t targetRootDeviceIndex) {
|
||||
auto &multiGraphicsAllocation = memObj->getMultiGraphicsAllocation();
|
||||
auto migrationSyncData = multiGraphicsAllocation.getMigrationSyncData();
|
||||
|
||||
auto sourceRootDeviceIndex = migrationSyncData->getCurrentLocation();
|
||||
if (sourceRootDeviceIndex == std::numeric_limits<uint32_t>::max()) {
|
||||
migrationSyncData->setCurrentLocation(targetRootDeviceIndex);
|
||||
return;
|
||||
}
|
||||
|
||||
migrationSyncData->startMigration();
|
||||
|
||||
auto srcMemory = multiGraphicsAllocation.getGraphicsAllocation(sourceRootDeviceIndex);
|
||||
auto dstMemory = multiGraphicsAllocation.getGraphicsAllocation(targetRootDeviceIndex);
|
||||
|
||||
auto size = srcMemory->getUnderlyingBufferSize();
|
||||
auto hostPtr = migrationSyncData->getHostPtr();
|
||||
|
||||
if (srcMemory->isAllocationLockable()) {
|
||||
auto srcLockPtr = memoryManager.lockResource(srcMemory);
|
||||
memcpy_s(hostPtr, size, srcLockPtr, size);
|
||||
memoryManager.unlockResource(srcMemory);
|
||||
} else {
|
||||
|
||||
auto srcCmdQ = context.getSpecialQueue(sourceRootDeviceIndex);
|
||||
if (srcMemory->getAllocationType() == GraphicsAllocation::AllocationType::IMAGE) {
|
||||
auto pImage = static_cast<Image *>(memObj);
|
||||
size_t origin[3] = {};
|
||||
size_t region[3] = {};
|
||||
pImage->fillImageRegion(region);
|
||||
|
||||
srcCmdQ->enqueueReadImage(pImage, CL_TRUE, origin, region, pImage->getHostPtrRowPitch(), pImage->getHostPtrSlicePitch(), hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
} else {
|
||||
auto pBuffer = static_cast<Buffer *>(memObj);
|
||||
srcCmdQ->enqueueReadBuffer(pBuffer, CL_TRUE, 0u, pBuffer->getSize(), hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
}
|
||||
srcCmdQ->finish();
|
||||
}
|
||||
|
||||
if (dstMemory->isAllocationLockable()) {
|
||||
auto dstLockPtr = memoryManager.lockResource(dstMemory);
|
||||
memcpy_s(dstLockPtr, size, hostPtr, size);
|
||||
memoryManager.unlockResource(dstMemory);
|
||||
} else {
|
||||
|
||||
auto dstCmdQ = context.getSpecialQueue(targetRootDeviceIndex);
|
||||
if (dstMemory->getAllocationType() == GraphicsAllocation::AllocationType::IMAGE) {
|
||||
auto pImage = static_cast<Image *>(memObj);
|
||||
size_t origin[3] = {};
|
||||
size_t region[3] = {};
|
||||
pImage->fillImageRegion(region);
|
||||
|
||||
dstCmdQ->enqueueWriteImage(pImage, CL_TRUE, origin, region, pImage->getHostPtrRowPitch(), pImage->getHostPtrSlicePitch(), hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
} else {
|
||||
auto pBuffer = static_cast<Buffer *>(memObj);
|
||||
dstCmdQ->enqueueWriteBuffer(pBuffer, CL_TRUE, 0u, pBuffer->getSize(), hostPtr, nullptr, 0, nullptr, nullptr);
|
||||
}
|
||||
dstCmdQ->finish();
|
||||
}
|
||||
migrationSyncData->setCurrentLocation(targetRootDeviceIndex);
|
||||
}
|
||||
} // namespace NEO
|
||||
22
opencl/source/memory_manager/migration_controller.h
Normal file
22
opencl/source/memory_manager/migration_controller.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
class MemoryManager;
|
||||
class CommandStreamReceiver;
|
||||
class Context;
|
||||
class MultiGraphicsAllocation;
|
||||
class MemObj;
|
||||
class MigrationController {
|
||||
public:
|
||||
static void handleMigration(Context &context, CommandStreamReceiver &targetCsr, MemObj *memObj);
|
||||
static void migrateMemory(Context &context, MemoryManager &memoryManager, MemObj *memObj, uint32_t targetRootDeviceIndex);
|
||||
};
|
||||
} // namespace NEO
|
||||
Reference in New Issue
Block a user