Add bcs support for cross-device migrations of multi-graphics allocations

This commit adds a support for cross-device migration to bcs enqueue path
exercised for CL enqueue rea/write, map, unmap calls, controlled with
the regkey AllocateBuffersInLocalMemoryForMultiRootDeviceContexts.

Related-To: NEO-7092

Signed-off-by: Milczarek, Slawomir <slawomir.milczarek@intel.com>
This commit is contained in:
Milczarek, Slawomir
2022-12-28 19:37:21 +00:00
committed by Compute-Runtime-Automation
parent c268e30189
commit 1e8169ca3d
6 changed files with 195 additions and 13 deletions

View File

@@ -38,6 +38,7 @@
#include "opencl/source/helpers/queue_helpers.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/mem_obj/image.h"
#include "opencl/source/memory_manager/migration_controller.h"
#include "opencl/source/program/printf_handler.h"
#include "CL/cl_ext.h"
@@ -982,9 +983,6 @@ bool CommandQueue::queueDependenciesClearRequired() const {
bool CommandQueue::blitEnqueueAllowed(const CsrSelectionArgs &args) const {
bool blitEnqueueAllowed = getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() || this->isCopyOnly;
if (this->getContext().getRootDeviceIndices().size() > 1) {
blitEnqueueAllowed &= !DebugManager.flags.AllocateBuffersInLocalMemoryForMultiRootDeviceContexts.get();
}
if (DebugManager.flags.EnableBlitterForEnqueueOperations.get() != -1) {
blitEnqueueAllowed = DebugManager.flags.EnableBlitterForEnqueueOperations.get();
}
@@ -1308,4 +1306,20 @@ void CommandQueue::clearLastBcsPackets() {
}
}
void CommandQueue::migrateMultiGraphicsAllocationsIfRequired(const BuiltinOpParams &operationParams, CommandStreamReceiver &csr) {
if (!DebugManager.flags.AllocateBuffersInLocalMemoryForMultiRootDeviceContexts.get()) {
return;
}
for (auto argMemObj : {operationParams.srcMemObj, operationParams.dstMemObj}) {
if (argMemObj) {
auto memObj = argMemObj->getHighestRootMemObj();
auto migrateRequiredForArg = memObj->getMultiGraphicsAllocation().requiresMigrations();
if (migrateRequiredForArg) {
MigrationController::handleMigration(*this->context, csr, memObj);
}
}
}
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -382,6 +382,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
void providePerformanceHint(TransferProperties &transferProperties);
bool queueDependenciesClearRequired() const;
bool blitEnqueueAllowed(const CsrSelectionArgs &args) const;
MOCKABLE_VIRTUAL void migrateMultiGraphicsAllocationsIfRequired(const BuiltinOpParams &operationParams, CommandStreamReceiver &csr);
inline bool shouldFlushDC(uint32_t commandType, PrintfHandler *printfHandler) const {
return (commandType == CL_COMMAND_READ_BUFFER ||

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -1288,6 +1288,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
}
processBarrierTimestampForBcsEngine(bcsCsr.getOsContext().getEngineType(), timestampPacketDependencies);
if (!blockQueue && this->getContext().getRootDeviceIndices().size() > 1) {
migrateMultiGraphicsAllocationsIfRequired(multiDispatchInfo.peekBuiltinOpParams(), bcsCsr);
}
auto gpgpuSubmission = isGpgpuSubmissionForBcsRequired(blockQueue, timestampPacketDependencies);
if (isCacheFlushForBcsRequired() && gpgpuSubmission) {
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());