diff --git a/level_zero/core/source/device/bcs_split.h b/level_zero/core/source/device/bcs_split.h index 498b9c1113..bf8d65b9ed 100644 --- a/level_zero/core/source/device/bcs_split.h +++ b/level_zero/core/source/device/bcs_split.h @@ -93,7 +93,7 @@ struct BcsSplit { engineCount--; } - cmdList->addEventsToCmdList(static_cast(this->cmdQs.size()), eventHandles.data(), true); + cmdList->addEventsToCmdList(static_cast(this->cmdQs.size()), eventHandles.data(), hasRelaxedOrderingDependencies); cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], false, true); if (hSignalEvent) { diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp index 175ed21af5..5d192949cb 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdqueue_xe_hpc_core.cpp @@ -556,6 +556,75 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenFlushTaskSubmissionEnabledAndSplitBcsC context->freeMem(dstPtr); } +HWTEST2_F(CommandQueueCommandsXeHpc, givenRelaxedOrderingNotAllowedWhenDispatchSplitThenUseSemaphores, IsXeHpcCore) { + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + + DebugManagerStateRestore restorer; + DebugManager.flags.SplitBcsCopy.set(1); + + ze_result_t returnValue; + auto hwInfo = *NEO::defaultHwInfo; + hwInfo.featureTable.ftrBcsInfo = 0b111111111; + hwInfo.capabilityTable.blitterOperationsSupported = true; + auto testNeoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); + auto testL0Device = std::unique_ptr(L0::Device::create(driverHandle.get(), testNeoDevice, false, &returnValue)); + + ze_command_queue_desc_t desc = {}; + desc.ordinal = static_cast(testNeoDevice->getEngineGroupIndexFromEngineGroupType(NEO::EngineGroupType::Copy)); + + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + testL0Device.get(), + &desc, + false, + NEO::EngineGroupType::Copy, + returnValue)); + ASSERT_NE(nullptr, commandList0); + + constexpr size_t alignment = 4096u; + constexpr size_t size = 8 * MemoryConstants::megaByte; + void *srcPtr; + void *dstPtr; + ze_device_mem_alloc_desc_t deviceDesc = {}; + context->allocDeviceMem(device->toHandle(), + &deviceDesc, + size, alignment, &srcPtr); + ze_host_mem_alloc_desc_t hostDesc = {}; + context->allocHostMem(&hostDesc, size, alignment, &dstPtr); + auto ultCsr = static_cast *>(commandList0->csr); + ultCsr->recordFlusheBatchBuffer = true; + EXPECT_EQ(1u, ultCsr->getNumClients()); + + auto directSubmission = new MockDirectSubmissionHw>(*ultCsr); + ultCsr->directSubmission.reset(directSubmission); + + auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr, false); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[0])->getCsr()->peekTaskCount(), 1u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[1])->getCsr()->peekTaskCount(), 1u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[2])->getCsr()->peekTaskCount(), 1u); + EXPECT_EQ(static_cast(static_cast(testL0Device.get())->bcsSplit.cmdQs[3])->getCsr()->peekTaskCount(), 1u); + EXPECT_FALSE(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies); + + uint32_t semaphoresFound = 0; + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, commandList0->commandContainer.getCommandStream()->getCpuBase(), commandList0->commandContainer.getCommandStream()->getUsed())); + + for (auto &cmd : cmdList) { + if (genCmdCast(cmd)) { + semaphoresFound++; + } + } + + EXPECT_EQ(4u, semaphoresFound); + + context->freeMem(srcPtr); + context->freeMem(dstPtr); +} + HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhenAppendingMemoryCopyThenSuccessIsReturned, IsXeHpcCore) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;