diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index b8d60b8aae..d29c008ec3 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -257,6 +257,7 @@ struct CommandList : _ze_command_list_handle_t { uint32_t partitionCount = 1; bool isFlushTaskSubmissionEnabled = false; bool isSyncModeQueue = false; + bool isTbxMode = false; bool commandListSLMEnabled = false; bool requiresQueueUncachedMocs = false; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index af0be219af..c83da1f295 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -117,12 +117,7 @@ ze_result_t CommandListCoreFamily::initialize(Device *device, NEO this->partitionCount = static_cast(this->device->getNEODevice()->getDeviceBitfield().count()); } - if (this->cmdListType == CommandListType::TYPE_IMMEDIATE && !isCopyOnly() && !isInternal()) { - const auto &hwInfo = device->getHwInfo(); - this->isFlushTaskSubmissionEnabled = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily).isPlatformFlushTaskEnabled(hwInfo); - if (NEO::DebugManager.flags.EnableFlushTaskSubmission.get() != -1) { - this->isFlushTaskSubmissionEnabled = !!NEO::DebugManager.flags.EnableFlushTaskSubmission.get(); - } + if (this->isFlushTaskSubmissionEnabled) { commandContainer.setFlushTaskUsedForImmediate(this->isFlushTaskSubmissionEnabled); } @@ -149,17 +144,22 @@ ze_result_t CommandListCoreFamily::executeCommandListImmediate(bo this->close(); ze_command_list_handle_t immediateHandle = this->toHandle(); + this->commandContainer.removeDuplicatesFromResidencyContainer(); const auto commandListExecutionResult = this->cmdQImmediate->executeCommandLists(1, &immediateHandle, nullptr, performMigration); if (commandListExecutionResult == ZE_RESULT_ERROR_DEVICE_LOST) { return commandListExecutionResult; } - const auto synchronizationResult = this->cmdQImmediate->synchronize(std::numeric_limits::max()); - if (synchronizationResult == ZE_RESULT_ERROR_DEVICE_LOST) { - return synchronizationResult; - } + if (this->isCopyOnly() && !this->isSyncModeQueue && !this->isTbxMode) { + this->commandContainer.currentLinearStreamStartOffset = this->commandContainer.getCommandStream()->getUsed(); + } else { + const auto synchronizationResult = this->cmdQImmediate->synchronize(std::numeric_limits::max()); + if (synchronizationResult == ZE_RESULT_ERROR_DEVICE_LOST) { + return synchronizationResult; + } - this->reset(); + this->reset(); + } return ZE_RESULT_SUCCESS; } diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 202ad09314..a547c0bba0 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -114,6 +114,13 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device commandList->internalUsage = internalUsage; commandList->cmdListType = CommandListType::TYPE_IMMEDIATE; commandList->isSyncModeQueue = (desc->mode == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS); + if (!(NEO::EngineGroupType::Copy == engineType) && !internalUsage) { + const auto &hwInfo = device->getHwInfo(); + commandList->isFlushTaskSubmissionEnabled = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily).isPlatformFlushTaskEnabled(hwInfo); + if (NEO::DebugManager.flags.EnableFlushTaskSubmission.get() != -1) { + commandList->isFlushTaskSubmissionEnabled = !!NEO::DebugManager.flags.EnableFlushTaskSubmission.get(); + } + } returnValue = commandList->initialize(device, engineType, desc->flags); if (returnValue != ZE_RESULT_SUCCESS) { commandList->destroy(); @@ -130,6 +137,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device commandList->cmdQImmediate = commandQueue; commandList->csr = csr; + commandList->isTbxMode = (csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX) || (csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX_WITH_AUB); commandList->commandListPreemptionMode = device->getDevicePreemptionMode(); return commandList; } diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index b919ebc80d..617e96a004 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -369,6 +369,7 @@ ze_result_t CommandQueueHw::executeCommandLists( auto commandList = CommandList::fromHandle(phCommandLists[i]); auto &cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations(); auto cmdBufferCount = cmdBufferAllocations.size(); + bool immediateMode = (commandList->cmdListType == CommandList::CommandListType::TYPE_IMMEDIATE) ? true : false; auto commandListPreemption = commandList->getCommandListPreemptionMode(); if (statePreemption != commandListPreemption) { @@ -413,7 +414,11 @@ ze_result_t CommandQueueHw::executeCommandLists( for (size_t iter = 0; iter < cmdBufferCount; iter++) { auto allocation = cmdBufferAllocations[iter]; - NEO::EncodeBatchBufferStartOrEnd::programBatchBufferStart(&child, allocation->getGpuAddress(), true); + uint64_t startOffset = allocation->getGpuAddress(); + if (immediateMode && (iter == (cmdBufferCount - 1))) { + startOffset = ptrOffset(allocation->getGpuAddress(), commandList->commandContainer.currentLinearStreamStartOffset); + } + NEO::EncodeBatchBufferStartOrEnd::programBatchBufferStart(&child, startOffset, true); } printfFunctionContainer.insert(printfFunctionContainer.end(), diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 9e62a24a1c..1dc32fa428 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -1694,7 +1694,7 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenSetBarrierThenMiFlus EXPECT_NE(cmdList.end(), itor); } -HWTEST_F(CommandListCreate, givenImmediateCommandListWithCopyOnlyWhenSetBarrierThenMiFlushCmdIsNotInsertedInTheCmdContainer) { +HWTEST_F(CommandListCreate, givenImmediateCommandListWithCopyOnlyWhenSetBarrierThenMiFlushCmdIsInsertedInTheCmdContainer) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; @@ -1713,7 +1713,7 @@ HWTEST_F(CommandListCreate, givenImmediateCommandListWithCopyOnlyWhenSetBarrierT cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); - EXPECT_EQ(cmdList.end(), itor); + EXPECT_NE(cmdList.end(), itor); } HWTEST_F(CommandListCreate, whenCommandListIsResetThenContainsStatelessUncachedResourceIsSetToFalse) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp index e2c17f4a96..3fa5cdd95e 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp @@ -731,11 +731,11 @@ HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueAndCopyOnly cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); - EXPECT_EQ(cmdList.end(), itor); - EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed()); + EXPECT_NE(cmdList.end(), itor); + EXPECT_GT(commandContainer.getCommandStream()->getUsed(), used); } -HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithSubdeviceScopeThenMiFlushAndSemWaitAreAddedViaFlushTask) { +HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithSubdeviceScopeThenMiFlushAndSemWaitAreAdded) { using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; ze_command_queue_desc_t desc = {}; @@ -763,8 +763,33 @@ HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhe cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); - EXPECT_EQ(cmdList.end(), itor); - EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed()); + EXPECT_NE(cmdList.end(), itor); + EXPECT_GT(commandContainer.getCommandStream()->getUsed(), used); +} + +HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndTbxCsrWithCopyOnlyImmediateCommandListWhenAppendWaitEventsReturnsSuccess) { + using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + commandList->isTbxMode = true; + + MockEvent event, event2; + event.signalScope = 0; + event.waitScope = 0; + event2.waitScope = 0; + ze_event_handle_t events[] = {&event, &event2}; + + auto ret = commandList->appendWaitOnEvents(2, events); + EXPECT_EQ(ZE_RESULT_SUCCESS, ret); } HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueWithCopyOnlyImmediateCommandListCreatedThenSlushTaskSubmissionIsSetToFalse) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index 5bef27728e..de6f42ffd5 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -1213,6 +1213,59 @@ HWTEST_F(CommandListAppendLaunchKernel, givenInvalidEventListWhenAppendLaunchCoo EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue); } +using WithinXeHPAndXeHPC = IsWithinGfxCore; +HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenAppendingKernelWithImmediateListWithoutFlushTaskThenNewCmdBufferAllocated, WithinXeHPAndXeHPC) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(0); + using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; + createKernel(); + + ze_result_t returnValue; + ze_command_queue_desc_t queueDesc = {}; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::Compute, returnValue)); + + auto &commandContainer = commandList->commandContainer; + const auto stream = commandContainer.getCommandStream(); + const auto streamCpu = stream->getCpuBase(); + + Vec3 groupCount{1, 1, 1}; + auto sizeLeftInStream = sizeof(MI_BATCH_BUFFER_END); + auto available = stream->getAvailableSpace(); + stream->getSpace(available - sizeLeftInStream); + + const uint32_t threadGroupDimensions[3] = {1, 1, 1}; + + NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ + 0, + device->getNEODevice(), + kernel.get(), + threadGroupDimensions, + PreemptionMode::MidBatch, + 0, + false, + false, + false, + false, + false, + false, + false, + false}; + NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs); + + auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); + ASSERT_GT(usedSpaceAfter, 0u); + + const auto streamCpu2 = stream->getCpuBase(); + + EXPECT_NE(nullptr, streamCpu2); + EXPECT_NE(streamCpu, streamCpu2); + + EXPECT_EQ(2u, commandContainer.getCmdBufferAllocations().size()); + auto immediateHandle = commandList->toHandle(); + returnValue = commandList->cmdQImmediate->executeCommandLists(1, &immediateHandle, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); +} + HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLaunchCooperativeKernelIsCalledThenCorrectValueIsReturned, IsAtLeastSkl) { Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp index e9b39c9ce2..21055cf601 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp @@ -220,6 +220,51 @@ HWTEST2_F(AppendMemoryCopy, givenImmediateCommandListWhenAppendingMemoryCopyWith commandList->cmdQImmediate = nullptr; } +HWTEST2_F(AppendMemoryCopy, givenAsyncImmediateCommandListWhenAppendingMemoryCopyWithCopyEngineThenSuccessIsReturned, IsAtLeastSkl) { + Mock cmdQueue; + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + + auto commandList = std::make_unique>>(); + ASSERT_NE(nullptr, commandList); + ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, ret); + commandList->device = device; + commandList->cmdQImmediate = &cmdQueue; + commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + + auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(1u, cmdQueue.executeCommandListsCalled); + EXPECT_EQ(0u, cmdQueue.synchronizeCalled); + + commandList->cmdQImmediate = nullptr; +} + +HWTEST2_F(AppendMemoryCopy, givenSyncModeImmediateCommandListWhenAppendingMemoryCopyWithCopyEngineThenSuccessIsReturned, IsAtLeastSkl) { + Mock cmdQueue; + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + + auto commandList = std::make_unique>>(); + ASSERT_NE(nullptr, commandList); + ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::Copy, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, ret); + commandList->device = device; + commandList->cmdQImmediate = &cmdQueue; + commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + commandList->isSyncModeQueue = true; + + auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(1u, cmdQueue.executeCommandListsCalled); + EXPECT_EQ(1u, cmdQueue.synchronizeCalled); + + commandList->cmdQImmediate = nullptr; +} + HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyCalledThenPipeControlWithDcFlushAdded, IsAtLeastSkl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; diff --git a/shared/source/command_container/cmdcontainer.cpp b/shared/source/command_container/cmdcontainer.cpp index 08789d9ed3..7be04b9f7c 100644 --- a/shared/source/command_container/cmdcontainer.cpp +++ b/shared/source/command_container/cmdcontainer.cpp @@ -286,6 +286,7 @@ void CommandContainer::closeAndAllocateNextCommandBuffer() { auto ptr = commandStream->getSpace(0u); memcpy_s(ptr, bbEndSize, hwHelper.getBatchBufferEndReference(), bbEndSize); allocateNextCommandBuffer(); + currentLinearStreamStartOffset = 0u; } void CommandContainer::prepareBindfulSsh() { diff --git a/shared/source/command_container/cmdcontainer.h b/shared/source/command_container/cmdcontainer.h index 04a77c613d..9200431bbb 100644 --- a/shared/source/command_container/cmdcontainer.h +++ b/shared/source/command_container/cmdcontainer.h @@ -80,6 +80,7 @@ class CommandContainer : public NonCopyableOrMovableClass { uint32_t nextIddInBlock = 0; bool lastPipelineSelectModeRequired = false; bool lastSentUseGlobalAtomics = false; + uint64_t currentLinearStreamStartOffset = 0u; Device *getDevice() const { return device; }