fix multi device ocl: add missing tag update flush after blit

when memory is migrated then we need to flush tag update

Related-To: NEO-7552
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski 2023-04-04 09:51:38 +02:00 committed by Compute-Runtime-Automation
parent a5179aae0b
commit d014a731c5
4 changed files with 18 additions and 7 deletions

View File

@ -1350,16 +1350,19 @@ void CommandQueue::clearLastBcsPackets() {
}
}
void CommandQueue::migrateMultiGraphicsAllocationsIfRequired(const BuiltinOpParams &operationParams, CommandStreamReceiver &csr) {
bool CommandQueue::migrateMultiGraphicsAllocationsIfRequired(const BuiltinOpParams &operationParams, CommandStreamReceiver &csr) {
bool migrationHandled = false;
for (auto argMemObj : {operationParams.srcMemObj, operationParams.dstMemObj}) {
if (argMemObj) {
auto memObj = argMemObj->getHighestRootMemObj();
auto migrateRequiredForArg = memObj->getMultiGraphicsAllocation().requiresMigrations();
if (migrateRequiredForArg) {
MigrationController::handleMigration(*this->context, csr, memObj);
migrationHandled = true;
}
}
}
return migrationHandled;
}
} // namespace NEO

View File

@ -392,7 +392,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
void providePerformanceHint(TransferProperties &transferProperties);
bool queueDependenciesClearRequired() const;
bool blitEnqueueAllowed(const CsrSelectionArgs &args) const;
MOCKABLE_VIRTUAL void migrateMultiGraphicsAllocationsIfRequired(const BuiltinOpParams &operationParams, CommandStreamReceiver &csr);
MOCKABLE_VIRTUAL bool migrateMultiGraphicsAllocationsIfRequired(const BuiltinOpParams &operationParams, CommandStreamReceiver &csr);
inline bool shouldFlushDC(uint32_t commandType, PrintfHandler *printfHandler) const {
return (commandType == CL_COMMAND_READ_BUFFER ||

View File

@ -1336,6 +1336,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
}
auto blockQueue = false;
bool migratedMemory = false;
TaskCountType taskLevel = 0u;
obtainTaskLevelAndBlockedStatus(taskLevel, eventsRequest.numEventsInWaitList, eventsRequest.eventWaitList, blockQueue, cmdType);
auto clearAllDependencies = queueDependenciesClearRequired();
@ -1368,7 +1369,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
processBarrierTimestampForBcsEngine(bcsCsr.getOsContext().getEngineType(), timestampPacketDependencies);
if (!blockQueue && this->getContext().getRootDeviceIndices().size() > 1) {
migrateMultiGraphicsAllocationsIfRequired(multiDispatchInfo.peekBuiltinOpParams(), bcsCsr);
migratedMemory = migrateMultiGraphicsAllocationsIfRequired(multiDispatchInfo.peekBuiltinOpParams(), bcsCsr);
}
auto gpgpuSubmission = isGpgpuSubmissionForBcsRequired(blockQueue, timestampPacketDependencies);
@ -1449,8 +1450,13 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
commandStreamReceiverOwnership.unlock();
}
queueOwnership.unlock();
bcsCommandStreamReceiverOwnership.unlock();
if (migratedMemory) {
bcsCsr.flushBatchedSubmissions();
bcsCsr.flushTagUpdate();
}
bcsCommandStreamReceiverOwnership.unlock();
if (blocking) {
const auto waitStatus = waitForAllEngines(blockQueue, nullptr);
if (waitStatus == WaitStatus::GpuHang) {

View File

@ -915,11 +915,11 @@ struct BcsCrossDeviceMigrationTests : public ::testing::Test {
public:
MockCmdQToTestMigration(Context *context, ClDevice *device) : CommandQueueHw<FamilyType>(context, device, nullptr, false) {}
void migrateMultiGraphicsAllocationsIfRequired(const BuiltinOpParams &operationParams, CommandStreamReceiver &csr) override {
bool migrateMultiGraphicsAllocationsIfRequired(const BuiltinOpParams &operationParams, CommandStreamReceiver &csr) override {
migrateMultiGraphicsAllocationsIfRequiredCalled = true;
migrateMultiGraphicsAllocationsReceivedOperationParams = operationParams;
migrateMultiGraphicsAllocationsReceivedCsr = &csr;
CommandQueueHw<FamilyType>::migrateMultiGraphicsAllocationsIfRequired(operationParams, csr);
return CommandQueueHw<FamilyType>::migrateMultiGraphicsAllocationsIfRequired(operationParams, csr);
}
bool migrateMultiGraphicsAllocationsIfRequiredCalled = false;
@ -979,6 +979,8 @@ HWTEST_F(BcsCrossDeviceMigrationTests, givenBufferWithMultiStorageWhenEnqueueRea
char hostPtr[size]{};
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQueue->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS));
bcsCsr->flushTagUpdateCalled = false;
retVal = cmdQueue->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, size, hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
@ -986,9 +988,9 @@ HWTEST_F(BcsCrossDeviceMigrationTests, givenBufferWithMultiStorageWhenEnqueueRea
EXPECT_TRUE(cmdQueue->migrateMultiGraphicsAllocationsIfRequiredCalled);
auto bcsCsr = cmdQueue->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS);
EXPECT_EQ(bcsCsr, cmdQueue->migrateMultiGraphicsAllocationsReceivedCsr);
EXPECT_EQ(targetRootDeviceIndex, bcsCsr->getRootDeviceIndex());
EXPECT_TRUE(bcsCsr->flushTagUpdateCalled);
EXPECT_EQ(buffer.get(), cmdQueue->migrateMultiGraphicsAllocationsReceivedOperationParams.srcMemObj);
}