diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index dd556ae252..c57fdd1ca1 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -222,7 +222,7 @@ struct CommandListCoreFamily : CommandListImp { bool isCooperative); ze_result_t appendLaunchKernelSplit(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent); ze_result_t prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions); - void updateStreamProperties(Kernel &kernel, bool isMultiOsContextCapable); + void updateStreamProperties(Kernel &kernel, bool isMultiOsContextCapable, bool isCooperative); void clearCommandsToPatch(); void applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 321257da7e..ad5998a828 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2149,7 +2149,7 @@ ze_result_t CommandListCoreFamily::prepareIndirectParams(const ze } template -void CommandListCoreFamily::updateStreamProperties(Kernel &kernel, bool isMultiOsContextCapable) { +void CommandListCoreFamily::updateStreamProperties(Kernel &kernel, bool isMultiOsContextCapable, bool isCooperative) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE; @@ -2158,12 +2158,12 @@ void CommandListCoreFamily::updateStreamProperties(Kernel &kernel auto disableOverdispatch = hwHelper.isDisableOverdispatchAvailable(hwInfo); if (!containsAnyKernel) { - requiredStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), disableOverdispatch, false, device->getHwInfo()); + requiredStreamState.frontEndState.setProperties(isCooperative, disableOverdispatch, false, hwInfo); finalStreamState = requiredStreamState; containsAnyKernel = true; } - finalStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), disableOverdispatch, false, hwInfo); + finalStreamState.frontEndState.setProperties(isCooperative, disableOverdispatch, false, hwInfo); if (finalStreamState.frontEndState.isDirty()) { auto pVfeStateAddress = NEO::PreambleHelper::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType); auto pVfeState = new VFE_STATE_TYPE; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index b9f47ebc2c..a37956315a 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -120,8 +120,8 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z this->indirectAllocationsAllowed = true; } - if (!containsAnyKernel) { - containsCooperativeKernelsFlag = isCooperative; + if ((!containsAnyKernel) || NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get()) { + containsCooperativeKernelsFlag = (containsCooperativeKernelsFlag || isCooperative); } else if (containsCooperativeKernelsFlag != isCooperative) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } @@ -150,7 +150,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z kernel->getKernelDescriptor().kernelMetadata.kernelName.c_str(), 0u); } - updateStreamProperties(*kernel, false); + updateStreamProperties(*kernel, false, isCooperative); NEO::EncodeDispatchKernel::encode(commandContainer, reinterpret_cast(pThreadGroupDimensions), diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 9494322ef5..a4bc57aac9 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -188,8 +188,8 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z kernelDescriptor.kernelMetadata.kernelName.c_str(), 0u); } - if (!containsAnyKernel) { - containsCooperativeKernelsFlag = isCooperative; + if ((!containsAnyKernel) || NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get()) { + containsCooperativeKernelsFlag = (containsCooperativeKernelsFlag || isCooperative); } else if (containsCooperativeKernelsFlag != isCooperative) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } @@ -205,7 +205,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z auto isMultiOsContextCapable = NEO::ImplicitScalingHelper::isImplicitScalingEnabled(device->getNEODevice()->getDeviceBitfield(), !isCooperative); - updateStreamProperties(*kernel, isMultiOsContextCapable); + updateStreamProperties(*kernel, isMultiOsContextCapable, isCooperative); KernelImp *kernelImp = static_cast(kernel); this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs(); diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 81ee3d55da..7622a12f6c 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -82,7 +82,8 @@ ze_result_t CommandQueueHw::executeCommandLists( return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE; } - if (commandListsContainCooperativeKernels != commandList->containsCooperativeKernels()) { + if ((commandListsContainCooperativeKernels != commandList->containsCooperativeKernels()) && + (!NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get())) { return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE; } } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index c3bc096f2a..9a060dbbc2 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -1068,11 +1068,11 @@ HWTEST2_F(CommandListAppendLaunchKernel, whenUpdateStreamPropertiesIsCalledThenR auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); int32_t expectedDisableOverdispatch = hwHelper.isDisableOverdispatchAvailable(*defaultHwInfo); - pCommandList->updateStreamProperties(kernel, false); + pCommandList->updateStreamProperties(kernel, false, false); EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value); EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value); - pCommandList->updateStreamProperties(kernel, false); + pCommandList->updateStreamProperties(kernel, false, false); EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value); EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value); } @@ -1099,6 +1099,63 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeKernelWhenAppendLaunchC EXPECT_TRUE(pCommandList->containsCooperativeKernelsFlag); } +HWTEST2_F(CommandListAppendLaunchKernel, givenAnyCooperativeKernelAndMixingAllowedWhenAppendLaunchCooperativeKernelIsCalledThenCommandListTypeIsProperlySet, SklAndLaterMatcher) { + DebugManagerStateRestore restorer; + DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1); + createKernel(); + kernel->setGroupSize(4, 1, 1); + ze_group_count_t groupCount{8, 1, 1}; + auto pCommandList = std::make_unique>>(); + pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + + bool isCooperative = false; + auto result = pCommandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, isCooperative); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_TRUE(pCommandList->containsAnyKernel); + EXPECT_FALSE(pCommandList->containsCooperativeKernelsFlag); + + isCooperative = true; + result = pCommandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, isCooperative); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_TRUE(pCommandList->containsAnyKernel); + EXPECT_TRUE(pCommandList->containsCooperativeKernelsFlag); + + isCooperative = false; + result = pCommandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, isCooperative); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_TRUE(pCommandList->containsAnyKernel); + EXPECT_TRUE(pCommandList->containsCooperativeKernelsFlag); +} + +HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeAndNonCooperativeKernelsAndAllowMixingWhenAppendLaunchCooperativeKernelIsCalledThenReturnSuccess, SklAndLaterMatcher) { + DebugManagerStateRestore restorer; + DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1); + Mock<::L0::Kernel> kernel; + auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); + kernel.module = pMockModule.get(); + + kernel.setGroupSize(4, 1, 1); + ze_group_count_t groupCount{8, 1, 1}; + + auto pCommandList = std::make_unique>>(); + pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + bool isCooperative = false; + auto result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + isCooperative = true; + result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + pCommandList = std::make_unique>>(); + pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + isCooperative = true; + result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + isCooperative = false; + result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); +} + HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeAndNonCooperativeKernelsWhenAppendLaunchCooperativeKernelIsCalledThenReturnError, SklAndLaterMatcher) { Mock<::L0::Kernel> kernel; auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp index 91e4afa364..b3aa968091 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp @@ -430,6 +430,21 @@ HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandListsWithCooperativeAndNo auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE, result); } + + DebugManagerStateRestore restorer; + DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1); + { + ze_command_list_handle_t commandLists[] = {pCommandListWithCooperativeKernels->toHandle(), + pCommandListWithNonCooperativeKernels->toHandle()}; + auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + } + { + ze_command_list_handle_t commandLists[] = {pCommandListWithNonCooperativeKernels->toHandle(), + pCommandListWithCooperativeKernels->toHandle()}; + auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + } pCommandQueue->destroy(); } diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index a9df8b1ab5..fd4794b04f 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -324,4 +324,5 @@ DoNotFreeResources = 0 OverrideGmmResourceUsageField = -1 LogAllocationType = 0 ProgramAdditionalPipeControlBeforeStateComputeModeCommand = 0 -OverrideBufferSuitableForRenderCompression = -1 \ No newline at end of file +OverrideBufferSuitableForRenderCompression = -1 +AllowMixingRegularAndCooperativeKernels = 0 \ No newline at end of file diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index c02ba628de..dced3fc477 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -71,6 +71,7 @@ DECLARE_DEBUG_VARIABLE(bool, DisableTimestampEvents, false, "Timestamp info will DECLARE_DEBUG_VARIABLE(bool, EnableResourceTags, false, "Enable resource tagging in GMM") DECLARE_DEBUG_VARIABLE(bool, EnableFlushTaskSubmission, false, "true: driver uses csr flushTask for immediate submissions, false: driver uses legacy executeCommandList path") DECLARE_DEBUG_VARIABLE(bool, DoNotFreeResources, false, "true: driver stops freeing resources") +DECLARE_DEBUG_VARIABLE(bool, AllowMixingRegularAndCooperativeKernels, false, "true: driver allows mixing regular and cooperative kernels in a single command list and in a single execute") DECLARE_DEBUG_VARIABLE(std::string, ForceDeviceId, std::string("unk"), "DeviceId selected for testing") DECLARE_DEBUG_VARIABLE(std::string, LoadBinarySipFromFile, std::string("unk"), "Select binary file to load SIP kernel raw binary") DECLARE_DEBUG_VARIABLE(int64_t, OverrideMultiStoragePlacement, -1, "-1: disable, 0+: tile mask, each bit corresponds to tile")