diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index ecfce63b97..9474a719b1 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -905,6 +905,10 @@ void CommandQueue::processProperties(const cl_queue_properties *properties) { const auto &engine = getDevice().getEngineGroups()[selectedQueueFamilyIndex].engines[selectedQueueIndex]; auto engineType = engine.getEngineType(); auto engineUsage = engine.getEngineUsage(); + if ((DebugManager.flags.EngineUsageHint.get() != -1) && + (getDevice().tryGetEngine(engineType, static_cast(DebugManager.flags.EngineUsageHint.get())) != nullptr)) { + engineUsage = static_cast(DebugManager.flags.EngineUsageHint.get()); + } this->overrideEngine(engineType, engineUsage); this->queueCapabilities = getClDevice().getDeviceInfo().queueFamilyProperties[selectedQueueFamilyIndex].capabilities; this->queueFamilyIndex = selectedQueueFamilyIndex; @@ -929,7 +933,7 @@ void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage deferredTimestampPackets = std::make_unique(); isCopyOnly = true; } else { - gpgpuEngine = &device->getEngine(engineType, EngineUsage::Regular); + gpgpuEngine = &device->getEngine(engineType, engineUsage); } } diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index bdad289f4e..146b46ee0c 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -858,6 +858,28 @@ HWTEST_F(CommandQueueTests, givenMultipleCommandQueuesWhenMarkerIsEmittedThenGra EXPECT_EQ(commandStreamGraphicsAllocation, commandStreamGraphicsAllocation2); } +HWTEST_F(CommandQueueTests, givenEngineUsageHintSetWithInvalidValueWhenCreatingCommandQueueThenReturnSuccess) { + DebugManagerStateRestore restore; + DebugManager.flags.EngineUsageHint.set(static_cast(EngineUsage::EngineUsageCount)); + + auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + MockContext context(pDevice.get()); + + cl_int retVal = CL_SUCCESS; + cl_queue_properties propertiesCooperativeQueue[] = {CL_QUEUE_FAMILY_INTEL, 0, CL_QUEUE_INDEX_INTEL, 0, 0}; + + auto pCmdQ = CommandQueue::create( + &context, + pDevice.get(), + propertiesCooperativeQueue, + false, + retVal); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_NE(nullptr, pCmdQ); + EXPECT_EQ(EngineUsage::Regular, pCmdQ->getGpgpuEngine().getEngineUsage()); + delete pCmdQ; +} + struct WaitForQueueCompletionTests : public ::testing::Test { template struct MyCmdQueue : public CommandQueueHw { diff --git a/opencl/test/unit_test/command_queue/command_queue_tests_pvc_and_later.cpp b/opencl/test/unit_test/command_queue/command_queue_tests_pvc_and_later.cpp index c5eda3a3a9..b7b262f30b 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests_pvc_and_later.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests_pvc_and_later.cpp @@ -112,6 +112,39 @@ HWTEST2_F(CommandQueuePvcAndLaterTests, givenQueueWithMainBcsIsReleasedWhenNewQu EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, queue2->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS2)->getOsContext().getEngineType()); } +HWTEST2_F(CommandQueuePvcAndLaterTests, givenCooperativeEngineUsageHintAndCcsWhenCreatingCommandQueueThenCreateQueueWithCooperativeEngine, IsAtLeastXeHpcCore) { + DebugManagerStateRestore restore; + DebugManager.flags.EngineUsageHint.set(static_cast(EngineUsage::Cooperative)); + + auto hwInfo = *defaultHwInfo; + hwInfo.featureTable.flags.ftrCCSNode = true; + hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4; + auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); + auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); + + uint32_t revisions[] = {REVISION_A0, REVISION_B}; + for (auto &revision : revisions) { + auto hwRevId = hwInfoConfig.getHwRevIdFromStepping(revision, hwInfo); + hwInfo.platform.usRevId = hwRevId; + if (hwRevId == CommonConstants::invalidStepping || + !hwHelper.isCooperativeEngineSupported(hwInfo)) { + continue; + } + + auto pDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); + MockContext context(pDevice.get()); + cl_queue_properties propertiesCooperativeQueue[] = {CL_QUEUE_FAMILY_INTEL, 0, CL_QUEUE_INDEX_INTEL, 0, 0}; + propertiesCooperativeQueue[1] = pDevice->getDevice().getEngineGroupIndexFromEngineGroupType(EngineGroupType::Compute); + + for (size_t i = 0; i < 4; i++) { + propertiesCooperativeQueue[3] = i; + auto pCommandQueue = std::make_unique>(&context, pDevice.get(), propertiesCooperativeQueue); + EXPECT_EQ(aub_stream::ENGINE_CCS + i, pCommandQueue->gpgpuEngine->osContext->getEngineType()); + EXPECT_EQ(EngineUsage::Cooperative, pCommandQueue->gpgpuEngine->osContext->getEngineUsage()); + } + } +} + struct BcsCsrSelectionCommandQueueTests : ::testing::Test { void SetUp() override { HardwareInfo hwInfo = *::defaultHwInfo; diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index ab596eec56..ade4628a45 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -359,6 +359,7 @@ ProgramGlobalFenceAsKernelInstructionInEUKernel = -1 DoNotReportTile1BscWaActive = -1 ForceTile0PlacementForTile1ResourcesWaActive = -1 ClosEnabled = -1 +EngineUsageHint = -1 AddStatePrefetchCmdToMemoryPrefetchAPI = -1 UpdateCrossThreadDataSize = 0 ForceBcsEngineIndex = -1 \ No newline at end of file diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index d26e36ce56..102f02134e 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -176,6 +176,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ProgramGlobalFenceAsKernelInstructionInEUKernel, DECLARE_DEBUG_VARIABLE(int32_t, DoNotReportTile1BscWaActive, -1, "-1: default, 0: WA Disabled, 1: WA enabled") DECLARE_DEBUG_VARIABLE(int32_t, ForceTile0PlacementForTile1ResourcesWaActive, -1, "-1: default, 0: WA Disabled, 1: WA enabled") DECLARE_DEBUG_VARIABLE(int32_t, ClosEnabled, -1, "-1: default, 0: disabled, 1: enabled. Enable CLOS based cache reservation") +DECLARE_DEBUG_VARIABLE(int32_t, EngineUsageHint, -1, "-1: default, >=0: engine usage value to use when creating command queue on user selected engine") DECLARE_DEBUG_VARIABLE(int32_t, ForceBcsEngineIndex, -1, "-1: default, >=0 Copy Engine index") /*LOGGING FLAGS*/