diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 65acd369a4..63c93f03bb 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -183,7 +183,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::flushRegular } template -ze_result_t CommandListCoreFamilyImmediate::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation) { - return executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation, this->cmdQImmediate); +ze_result_t CommandListCoreFamilyImmediate::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission) { + return executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation, copyOffloadSubmission ? this->cmdQImmediateCopyOffload : this->cmdQImmediate); } template @@ -427,7 +427,7 @@ inline ze_result_t CommandListCoreFamilyImmediate::executeCommand } NEO::CompletionStamp completionStamp; - if (isCopyOnly()) { + if (cmdQ->peekIsCopyOnlyCommandQueue()) { completionStamp = flushBcsTask(*commandStream, commandStreamStart, hasStallingCmds, hasRelaxedOrderingDependencies, csr); } else { completionStamp = (this->*computeFlushMethod)(*commandStream, commandStreamStart, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation); @@ -442,7 +442,7 @@ inline ze_result_t CommandListCoreFamilyImmediate::executeCommand ze_result_t status = ZE_RESULT_SUCCESS; - if (cmdQ == this->cmdQImmediate) { + if (cmdQ == this->cmdQImmediate || cmdQ == this->cmdQImmediateCopyOffload) { cmdQ->setTaskCount(completionStamp.taskCount); if (this->isSyncModeQueue) { @@ -513,7 +513,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernel( CommandListCoreFamily::handleInOrderDependencyCounter(event, true, false); } - return flushImmediate(ret, true, stallingCmdsForRelaxedOrdering, relaxedOrderingDispatch, true, hSignalEvent); + return flushImmediate(ret, true, stallingCmdsForRelaxedOrdering, relaxedOrderingDispatch, true, false, hSignalEvent); } template @@ -525,7 +525,7 @@ void CommandListCoreFamilyImmediate::handleInOrderNonWalkerSignal } if (nonWalkerSignalingHasRelaxedOrdering) { - result = flushImmediate(result, true, hasStallingCmds, relaxedOrderingDispatch, true, nullptr); + result = flushImmediate(result, true, hasStallingCmds, relaxedOrderingDispatch, true, false, nullptr); NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers(*this->commandContainer.getCommandStream()); relaxedOrderingDispatch = true; hasStallingCmds = hasStallingCmdsForRelaxedOrdering(1, relaxedOrderingDispatch); @@ -546,7 +546,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernelInd auto ret = CommandListCoreFamily::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); - return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent); + return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, false, hSignalEvent); } template @@ -573,7 +573,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendBarrier(ze_even ret = CommandListCoreFamily::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); this->dependenciesPresent = true; - return flushImmediate(ret, true, isStallingOperation, relaxedOrderingDispatch, false, hSignalEvent); + return flushImmediate(ret, true, isStallingOperation, relaxedOrderingDispatch, false, false, hSignalEvent); } template @@ -622,7 +622,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopy( numWaitEvents, phWaitEvents, relaxedOrderingDispatch, forceDisableCopyOnlyInOrderSignaling); } - return flushImmediate(ret, true, hasStallindCmds, relaxedOrderingDispatch, true, hSignalEvent); + return flushImmediate(ret, true, hasStallindCmds, relaxedOrderingDispatch, true, isCopyOffloadEnabled(), hSignalEvent); } template @@ -679,7 +679,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopyRegio hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch, forceDisableCopyOnlyInOrderSignaling); } - return flushImmediate(ret, true, hasStallindCmds, relaxedOrderingDispatch, true, hSignalEvent); + return flushImmediate(ret, true, hasStallindCmds, relaxedOrderingDispatch, true, isCopyOffloadEnabled(), hSignalEvent); } template @@ -694,7 +694,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryFill(void auto ret = CommandListCoreFamily::appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); - return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent); + return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, false, hSignalEvent); } template @@ -704,7 +704,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendSignalEvent(ze_ checkAvailableSpace(0, false, commonImmediateCommandSize); ret = CommandListCoreFamily::appendSignalEvent(hSignalEvent); - return flushImmediate(ret, true, true, false, false, hSignalEvent); + return flushImmediate(ret, true, true, false, false, false, hSignalEvent); } template @@ -714,7 +714,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendEventReset(ze_e checkAvailableSpace(0, false, commonImmediateCommandSize); ret = CommandListCoreFamily::appendEventReset(hSignalEvent); - return flushImmediate(ret, true, true, false, false, hSignalEvent); + return flushImmediate(ret, true, true, false, false, false, hSignalEvent); } template @@ -744,7 +744,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendPageFaultCopy(N } else { ret = CommandListCoreFamily::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost); } - return flushImmediate(ret, false, false, relaxedOrdering, true, nullptr); + return flushImmediate(ret, false, false, relaxedOrdering, true, false, nullptr); } template @@ -769,7 +769,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendWaitOnEvents(ui return ret; } - return flushImmediate(ret, true, true, false, false, nullptr); + return flushImmediate(ret, true, true, false, false, false, nullptr); } template @@ -781,7 +781,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendWriteGlobalTime auto ret = CommandListCoreFamily::appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents); - return flushImmediate(ret, true, true, false, false, hSignalEvent); + return flushImmediate(ret, true, true, false, false, false, hSignalEvent); } template @@ -825,7 +825,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyRegion auto ret = CommandListCoreFamily::appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); - return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent); + return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, false, hSignalEvent); } template @@ -843,7 +843,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyFromMe auto ret = CommandListCoreFamily::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); - return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent); + return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, false, hSignalEvent); } template @@ -861,7 +861,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyToMemo auto ret = CommandListCoreFamily::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); - return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent); + return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, false, hSignalEvent); } template @@ -881,7 +881,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyFromMe auto ret = CommandListCoreFamily::appendImageCopyFromMemoryExt(hDstImage, srcPtr, pDstRegion, srcRowPitch, srcSlicePitch, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); - return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent); + return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, false, hSignalEvent); } template @@ -901,7 +901,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyToMemo auto ret = CommandListCoreFamily::appendImageCopyToMemoryExt(dstPtr, hSrcImage, pSrcRegion, destRowPitch, destSlicePitch, hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch); - return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent); + return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, false, hSignalEvent); } template @@ -914,7 +914,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryRangesBar checkAvailableSpace(numWaitEvents, false, commonImmediateCommandSize); auto ret = CommandListCoreFamily::appendMemoryRangesBarrier(numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents); - return flushImmediate(ret, true, true, false, false, hSignalEvent); + return flushImmediate(ret, true, true, false, false, false, hSignalEvent); } template @@ -929,21 +929,21 @@ ze_result_t CommandListCoreFamilyImmediate::appendLaunchCooperati auto ret = CommandListCoreFamily::appendLaunchCooperativeKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, waitEventHandles, relaxedOrderingDispatch); - return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent); + return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, false, hSignalEvent); } template ze_result_t CommandListCoreFamilyImmediate::appendWaitOnMemory(void *desc, void *ptr, uint64_t data, ze_event_handle_t signalEventHandle, bool useQwordData) { checkAvailableSpace(0, false, commonImmediateCommandSize); auto ret = CommandListCoreFamily::appendWaitOnMemory(desc, ptr, data, signalEventHandle, useQwordData); - return flushImmediate(ret, true, false, false, false, signalEventHandle); + return flushImmediate(ret, true, false, false, false, false, signalEventHandle); } template ze_result_t CommandListCoreFamilyImmediate::appendWriteToMemory(void *desc, void *ptr, uint64_t data) { checkAvailableSpace(0, false, commonImmediateCommandSize); auto ret = CommandListCoreFamily::appendWriteToMemory(desc, ptr, data); - return flushImmediate(ret, true, false, false, false, nullptr); + return flushImmediate(ret, true, false, false, false, false, nullptr); } template @@ -1001,15 +1001,17 @@ ze_result_t CommandListCoreFamilyImmediate::hostSynchronize(uint6 template ze_result_t CommandListCoreFamilyImmediate::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, - bool hasRelaxedOrderingDependencies, bool kernelOperation, ze_event_handle_t hSignalEvent) { + bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission, ze_event_handle_t hSignalEvent) { auto signalEvent = Event::fromHandle(hSignalEvent); + auto queue = copyOffloadSubmission ? this->cmdQImmediateCopyOffload : this->cmdQImmediate; + if (inputRet == ZE_RESULT_SUCCESS) { if (this->isFlushTaskSubmissionEnabled) { if (signalEvent && (NEO::debugManager.flags.TrackNumCsrClientsOnSyncPoints.get() != 0)) { - signalEvent->setLatestUsedCmdQueue(this->cmdQImmediate); + signalEvent->setLatestUsedCmdQueue(queue); } - inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation); + inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation, copyOffloadSubmission); } else { inputRet = executeCommandListImmediate(performMigration); } @@ -1018,8 +1020,8 @@ ze_result_t CommandListCoreFamilyImmediate::flushImmediate(ze_res this->latestFlushIsHostVisible = !this->dcFlushSupport; if (signalEvent) { - signalEvent->setCsr(this->csr, isInOrderExecutionEnabled()); - this->latestFlushIsHostVisible |= signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST); + signalEvent->setCsr(static_cast(queue)->getCsr(), isInOrderExecutionEnabled()); + this->latestFlushIsHostVisible |= signalEvent->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST) && !copyOffloadSubmission; } return inputRet; diff --git a/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h b/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h index f95e96b258..03e0239a27 100644 --- a/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h @@ -33,6 +33,7 @@ struct InOrderCmdListFixture : public ::Test { using EventImp::signalScope; using EventImp::unsetCmdQueue; using EventImp::externalInterruptId; + using EventImp::latestUsedCmdQueue; void makeCounterBasedInitiallyDisabled() { counterBasedMode = CounterBasedMode::initiallyDisabled; diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index d8b2164dd0..ef74e005bf 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -703,10 +703,10 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm return executeCommandListImmediateReturnValue; } - ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation) override { + ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission) override { ++executeCommandListImmediateWithFlushTaskCalledCount; if (callBaseExecute) { - return BaseClass::executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation); + return BaseClass::executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation, copyOffloadSubmission); } return executeCommandListImmediateWithFlushTaskReturnValue; } diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h b/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h index ede54b5f7b..a92d110204 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h @@ -61,6 +61,8 @@ struct Mock : public CommandQueue { Mock(L0::Device *device = nullptr, NEO::CommandStreamReceiver *csr = nullptr, const ze_command_queue_desc_t *desc = &defaultCmdqueueDesc); ~Mock() override; + using CommandQueue::isCopyOnlyCommandQueue; + ADDMETHOD_NOBASE(createFence, ze_result_t, ZE_RESULT_SUCCESS, (const ze_fence_desc_t *desc, ze_fence_handle_t *phFence)); ADDMETHOD_NOBASE(destroy, ze_result_t, ZE_RESULT_SUCCESS, ()); ADDMETHOD_NOBASE(executeCommandLists, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence, bool performMigration, NEO::LinearStream *parentImmediateCommandlistLinearStream)); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index aa7b8bdc88..9ed73ee513 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -89,7 +89,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus commandListImmediate.requiredStreamState.stateComputeMode.isCoherencyRequired.value = 0; commandListImmediate.requiredStreamState.stateComputeMode.largeGrfMode.value = 1; commandListImmediate.requiredStreamState.stateComputeMode.threadArbitrationPolicy.value = NEO::ThreadArbitrationPolicy::RoundRobin; - commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true); + commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true, false); NEO::StateComputeModePropertiesSupport scmPropertiesSupport = {}; productHelper.fillScmPropertiesSupportStructure(scmPropertiesSupport); @@ -116,7 +116,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus commandListImmediate.requiredStreamState.stateComputeMode.isCoherencyRequired.value = 0; commandListImmediate.requiredStreamState.stateComputeMode.largeGrfMode.value = 0; commandListImmediate.requiredStreamState.stateComputeMode.threadArbitrationPolicy.value = NEO::ThreadArbitrationPolicy::AgeBased; - commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true); + commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true, false); expectedLargeGrfMode = scmPropertiesSupport.largeGrfMode ? 0 : -1; expectedThreadArbitrationPolicy = scmPropertiesSupport.threadArbitrationPolicy ? NEO::ThreadArbitrationPolicy::AgeBased : -1; @@ -143,7 +143,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus auto &commandListImmediate = static_cast &>(*commandList); commandListImmediate.containsAnyKernel = true; - commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true); + commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true, false); EXPECT_FALSE(commandListImmediate.containsAnyKernel); } @@ -158,7 +158,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus auto &commandListImmediate = static_cast &>(*commandList); commandListImmediate.containsAnyKernel = true; - commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true); + commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true, false); EXPECT_FALSE(commandListImmediate.containsAnyKernel); } @@ -173,7 +173,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus auto &commandListImmediate = static_cast &>(*commandList); commandListImmediate.containsAnyKernel = true; - commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true); + commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true, false); EXPECT_FALSE(commandListImmediate.containsAnyKernel); } @@ -188,7 +188,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus auto &commandListImmediate = static_cast &>(*commandList); commandListImmediate.containsAnyKernel = true; - commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true); + commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, true, false); EXPECT_FALSE(commandListImmediate.containsAnyKernel); } @@ -199,7 +199,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus commandList.reset(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::renderCompute, returnValue)); auto &commandListImmediate = static_cast &>(*commandList); - EXPECT_EQ(ZE_RESULT_SUCCESS, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false)); + EXPECT_EQ(ZE_RESULT_SUCCESS, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false, false)); } HWTEST2_F(CommandListExecuteImmediate, givenOutOfHostMemoryErrorOnFlushWhenExecutingCommandListImmediateWithFlushTaskThenProperErrorIsReturned, IsAtLeastSkl) { @@ -211,7 +211,7 @@ HWTEST2_F(CommandListExecuteImmediate, givenOutOfHostMemoryErrorOnFlushWhenExecu auto &commandStreamReceiver = neoDevice->getUltCommandStreamReceiver(); commandStreamReceiver.flushReturnValue = SubmissionStatus::outOfHostMemory; - EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false)); + EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false, false)); } HWTEST2_F(CommandListExecuteImmediate, givenOutOfDeviceMemoryErrorOnFlushWhenExecutingCommandListImmediateWithFlushTaskThenProperErrorIsReturned, IsAtLeastSkl) { @@ -223,7 +223,7 @@ HWTEST2_F(CommandListExecuteImmediate, givenOutOfDeviceMemoryErrorOnFlushWhenExe auto &commandStreamReceiver = neoDevice->getUltCommandStreamReceiver(); commandStreamReceiver.flushReturnValue = SubmissionStatus::outOfMemory; - EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false)); + EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false, false)); } HWTEST2_F(CommandListExecuteImmediate, GivenImmediateCommandListWhenCommandListIsCreatedThenCsrStateIsNotSet, IsAtLeastSkl) { @@ -494,10 +494,14 @@ HWTEST2_F(CommandListTest, givenImmediateCommandListWhenAppendMemoryRangesBarrie } HWTEST2_F(CommandListTest, givenImmediateCommandListWhenFlushImmediateThenOverrideEventCsr, IsAtLeastSkl) { + ze_command_queue_desc_t desc = {}; + auto queue = std::make_unique>(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &desc); + MockCommandListImmediateHw cmdList; cmdList.cmdListType = CommandList::CommandListType::typeImmediate; cmdList.initialize(device, NEO::EngineGroupType::copy, 0u); cmdList.commandContainer.setImmediateCmdListCsr(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); + cmdList.cmdQImmediate = queue.get(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; @@ -505,14 +509,15 @@ HWTEST2_F(CommandListTest, givenImmediateCommandListWhenFlushImmediateThenOverri auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); EXPECT_EQ(ZE_RESULT_SUCCESS, result); + MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); + ze_event_desc_t eventDesc = {}; eventDesc.index = 0; auto event = std::unique_ptr(static_cast(L0::Event::create(eventPool.get(), &eventDesc, device))); - MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield()); cmdList.csr = event->csrs[0]; event->csrs[0] = &mockCommandStreamReceiver; - cmdList.flushImmediate(ZE_RESULT_SUCCESS, false, false, false, false, event->toHandle()); + cmdList.flushImmediate(ZE_RESULT_SUCCESS, false, false, false, false, false, event->toHandle()); EXPECT_EQ(event->csrs[0], cmdList.csr); } @@ -1198,7 +1203,7 @@ HWTEST2_F(CommandListTest, givenCmdListWithIndirectAccessWhenExecutingCommandLis auto oldCommandQueue = commandListImmediate.cmdQImmediate; commandListImmediate.cmdQImmediate = &mockCommandQueue; commandListImmediate.indirectAllocationsAllowed = true; - commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false); + commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false, false); EXPECT_EQ(mockCommandQueue.handleIndirectAllocationResidencyCalledTimes, 1u); commandListImmediate.cmdQImmediate = oldCommandQueue; } @@ -1217,7 +1222,7 @@ HWTEST2_F(CommandListTest, givenCmdListWithNoIndirectAccessWhenExecutingCommandL auto oldCommandQueue = commandListImmediate.cmdQImmediate; commandListImmediate.cmdQImmediate = &mockCommandQueue; commandListImmediate.indirectAllocationsAllowed = false; - commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false); + commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false, false); EXPECT_EQ(mockCommandQueue.handleIndirectAllocationResidencyCalledTimes, 0u); commandListImmediate.cmdQImmediate = oldCommandQueue; } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp index d2ecc7a154..c3c30b9bf6 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp @@ -407,6 +407,7 @@ HWTEST2_F(AppendMemoryCopy, givenAsyncImmediateCommandListWhenAppendingMemoryCop auto cmdQueue = std::make_unique>(); cmdQueue->csr = ultCsr; + cmdQueue->isCopyOnlyCommandQueue = true; void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); @@ -502,6 +503,8 @@ HWTEST2_F(AppendMemoryCopy, givenSyncImmediateCommandListWhenAppendingMemoryCopy auto cmdQueue = std::make_unique>(); cmdQueue->csr = ultCsr; + cmdQueue->isCopyOnlyCommandQueue = true; + void *srcPtr = reinterpret_cast(0x1234); void *dstPtr = reinterpret_cast(0x2345); @@ -1020,28 +1023,36 @@ HWTEST2_F(AppendMemoryCopyFenceTest, givenDeviceToHostCopyWhenProgrammingThenAdd auto hostVisibleEvent = DestroyableZeUniquePtr(Event::create(eventPool.get(), &eventDescHostVisible, device)); auto regularEvent = DestroyableZeUniquePtr(Event::create(eventPool.get(), &eventDesc, device)); + auto csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver; + ze_command_queue_desc_t desc = {}; + MockCommandListCoreFamily cmdListRegular; cmdListRegular.initialize(device, NEO::EngineGroupType::copy, 0u); cmdListRegular.isFlushTaskSubmissionEnabled = true; - cmdListRegular.csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver; + cmdListRegular.csr = csr; MockCommandListCoreFamily cmdListRegularInOrder; cmdListRegularInOrder.initialize(device, NEO::EngineGroupType::copy, 0u); cmdListRegularInOrder.isFlushTaskSubmissionEnabled = true; - cmdListRegularInOrder.csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver; + cmdListRegularInOrder.csr = csr; cmdListRegularInOrder.enableInOrderExecution(); + auto queue1 = std::make_unique>(device, csr, &desc); + auto queue2 = std::make_unique>(device, csr, &desc); + MockCommandListImmediateHw cmdListImmediate; cmdListImmediate.initialize(device, NEO::EngineGroupType::copy, 0u); cmdListImmediate.isFlushTaskSubmissionEnabled = true; - cmdListImmediate.csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver; + cmdListImmediate.csr = csr; cmdListImmediate.cmdListType = CommandList::CommandListType::typeImmediate; + cmdListImmediate.cmdQImmediate = queue1.get(); MockCommandListImmediateHw cmdListImmediateInOrder; cmdListImmediateInOrder.initialize(device, NEO::EngineGroupType::copy, 0u); cmdListImmediateInOrder.isFlushTaskSubmissionEnabled = true; - cmdListImmediateInOrder.csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver; + cmdListImmediateInOrder.csr = csr; cmdListImmediateInOrder.cmdListType = CommandList::CommandListType::typeImmediate; + cmdListImmediateInOrder.cmdQImmediate = queue2.get(); cmdListImmediateInOrder.enableInOrderExecution(); constexpr size_t allocSize = 1; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp index 0f806eb5f9..a5b386a283 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp @@ -875,7 +875,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenWaitingForEventFromPreviousAp } } -HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenWaitingForRegularEventFromPreviousAppendThenSkip, IsAtLeastSkl) { +HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenWaitingForRegularEventFromPreviousAppendThenSkip, IsAtLeastXeHpCore) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto immCmdList = createCopyOnlyImmCmdList(); @@ -2004,7 +2004,7 @@ HWTEST2_F(InOrderCmdListTests, givenRelaxedOrderingWhenProgrammingTimestampEvent using BaseClass = WhiteBox>; using BaseClass::BaseClass; - ze_result_t flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, ze_event_handle_t hSignalEvent) override { + ze_result_t flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission, ze_event_handle_t hSignalEvent) override { flushData.push_back(this->cmdListCurrentStartOffset); this->cmdListCurrentStartOffset = this->commandContainer.getCommandStream()->getUsed(); @@ -2119,7 +2119,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenChainingWithRelaxedOrderingT using BaseClass = WhiteBox>; using BaseClass::BaseClass; - ze_result_t flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, ze_event_handle_t hSignalEvent) override { + ze_result_t flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission, ze_event_handle_t hSignalEvent) override { flushCount++; return ZE_RESULT_SUCCESS; @@ -7135,5 +7135,66 @@ HWTEST2_F(CopyOffloadInOrderTests, givenDeviceToHostCopyWhenProgrammingThenAddFe context->freeMem(deviceBuffer); } +HWTEST2_F(CopyOffloadInOrderTests, whenDispatchingSelectCorrectQueueAndCsr, IsAtLeastXeHpcCore) { + auto regularEventsPool = createEvents(1, false); + + auto immCmdList = createImmCmdListWithOffload(); + + auto regularCsr = static_cast(immCmdList->cmdQImmediate)->getCsr(); + auto copyCsr = static_cast(immCmdList->cmdQImmediateCopyOffload)->getCsr(); + + EXPECT_EQ(0u, regularCsr->peekTaskCount()); + EXPECT_EQ(0u, immCmdList->cmdQImmediate->getTaskCount()); + EXPECT_EQ(0u, copyCsr->peekTaskCount()); + EXPECT_EQ(0u, immCmdList->cmdQImmediateCopyOffload->getTaskCount()); + + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, events[0].get(), 0, nullptr, launchParams, false); + + EXPECT_EQ(1u, regularCsr->peekTaskCount()); + EXPECT_EQ(1u, immCmdList->cmdQImmediate->getTaskCount()); + + EXPECT_EQ(0u, copyCsr->peekTaskCount()); + EXPECT_EQ(0u, immCmdList->cmdQImmediateCopyOffload->getTaskCount()); + + EXPECT_EQ(regularCsr, events[0]->csrs[0]); + EXPECT_EQ(immCmdList->cmdQImmediate, events[0]->latestUsedCmdQueue); + + immCmdList->appendMemoryCopy(©Data, ©Data, 1, events[0].get(), 0, nullptr, false, false); + + EXPECT_EQ(1u, regularCsr->peekTaskCount()); + EXPECT_EQ(1u, immCmdList->cmdQImmediate->getTaskCount()); + + EXPECT_EQ(1u, copyCsr->peekTaskCount()); + EXPECT_EQ(1u, immCmdList->cmdQImmediateCopyOffload->getTaskCount()); + + EXPECT_EQ(copyCsr, events[0]->csrs[0]); + EXPECT_EQ(immCmdList->cmdQImmediateCopyOffload, events[0]->latestUsedCmdQueue); +} + +HWTEST2_F(CopyOffloadInOrderTests, givenCopyOperationWithHostVisibleEventThenMarkAsNotHostVisibleSubmission, IsAtLeastXeHpcCore) { + ze_result_t result = ZE_RESULT_SUCCESS; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + eventPoolDesc.count = 1; + + ze_event_desc_t eventDescHostVisible = {}; + eventDescHostVisible.signal = ZE_EVENT_SCOPE_FLAG_HOST; + + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + + auto hostVisibleEvent = DestroyableZeUniquePtr(Event::create(eventPool.get(), &eventDescHostVisible, device)); + + auto immCmdList = createImmCmdListWithOffload(); + + immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, hostVisibleEvent.get(), 0, nullptr, launchParams, false); + + EXPECT_TRUE(immCmdList->latestFlushIsHostVisible); + + immCmdList->appendMemoryCopy(©Data, ©Data, 1, hostVisibleEvent.get(), 0, nullptr, false, false); + + EXPECT_EQ(!immCmdList->dcFlushSupport, immCmdList->latestFlushIsHostVisible); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp index 8868b73ada..f408438d1c 100644 --- a/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp +++ b/level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp @@ -249,7 +249,7 @@ HWTEST2_F(CommandListStatePrefetchXeHpcCore, givenForceMemoryPrefetchForKmdMigra std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::renderCompute, returnValue)); auto &commandListImmediate = static_cast &>(*commandList); - result = commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false); + result = commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false, false, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto mockMemoryManager = reinterpret_cast(neoDevice->getMemoryManager());