From 31e9b5e9fa9db747e3b3cd298a9e3600100a3edf Mon Sep 17 00:00:00 2001 From: Mateusz Hoppe Date: Wed, 15 Nov 2023 09:01:43 +0000 Subject: [PATCH] feature: add support for secondary contexts in group Related-To: NEO-7824 Signed-off-by: Mateusz Hoppe --- level_zero/core/source/cmdlist/cmdlist.h | 4 + level_zero/core/source/device/device_imp.cpp | 12 ++ .../sources/cmdlist/test_cmdlist_7.cpp | 42 ++++ .../sources/cmdqueue/test_cmdqueue_3.cpp | 41 ++++ .../sources/device/test_l0_device.cpp | 67 +++++++ opencl/source/cl_device/cl_device.cpp | 4 +- opencl/source/command_queue/command_queue.cpp | 16 +- .../source/command_queue/command_queue_hw.h | 2 + .../compression_aub_tests_xehp_and_later.cpp | 2 +- .../command_queue/command_queue_tests.cpp | 13 +- .../command_stream/command_stream_receiver.h | 4 + ...ream_receiver_simulated_common_hw_base.inl | 6 + .../debug_settings/debug_variables_base.inl | 1 + shared/source/device/device.cpp | 127 +++++++++++++ shared/source/device/device.h | 29 +++ shared/source/helpers/engine_node_helper.cpp | 9 + shared/source/helpers/engine_node_helper.h | 1 + shared/source/helpers/gfx_core_helper.h | 8 + .../source/helpers/gfx_core_helper_base.inl | 13 ++ .../source/memory_manager/memory_manager.cpp | 28 +++ shared/source/memory_manager/memory_manager.h | 4 + shared/source/os_interface/os_context.h | 17 ++ shared/test/common/mocks/mock_device.h | 1 + shared/test/common/test_files/igdrcl.config | 1 + .../unit_test/device/neo_device_tests.cpp | 179 +++++++++++++++++- .../helpers/engine_node_helper_tests.cpp | 7 + .../helpers/gfx_core_helper_tests.cpp | 22 +++ 27 files changed, 652 insertions(+), 8 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 6f612cd6b9..1a9139b92b 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -329,6 +329,10 @@ struct CommandList : _ze_command_list_handle_t { this->csr = newCsr; } + NEO::CommandStreamReceiver *getCsr() const { + return this->csr; + } + bool hasKernelWithAssert() { return kernelWithAssertAppended; } diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index 7f4e42617f..cbd60fbead 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -1618,8 +1618,19 @@ ze_result_t DeviceImp::getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr auto &osContext = (*csr)->getOsContext(); + const NEO::GfxCoreHelper &gfxCoreHelper = neoDevice->getGfxCoreHelper(); + bool secondaryContextsEnabled = gfxCoreHelper.areSecondaryContextsSupported(); + if (neoDevice->isMultiRegularContextSelectionAllowed(osContext.getEngineType(), osContext.getEngineUsage())) { *csr = neoDevice->getNextEngineForMultiRegularContextMode(osContext.getEngineType()).commandStreamReceiver; + } else if (secondaryContextsEnabled && neoDevice->isSecondaryContextEngineType(osContext.getEngineType())) { + NEO::EngineTypeUsage engineTypeUsage; + engineTypeUsage.first = osContext.getEngineType(); + engineTypeUsage.second = NEO::EngineUsage::regular; + auto engine = neoDevice->getSecondaryEngineCsr(index, engineTypeUsage); + if (engine) { + *csr = engine->commandStreamReceiver; + } } } else { auto subDeviceOrdinal = ordinal - numEngineGroups; @@ -1648,6 +1659,7 @@ ze_result_t DeviceImp::getCsrForLowPriority(NEO::CommandStreamReceiver **csr) { return ZE_RESULT_SUCCESS; } } + // if the code falls through, we have no low priority context created by neoDevice. } UNRECOVERABLE_IF(true); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index 587ed27139..c4e09b8405 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -1878,5 +1878,47 @@ HWTEST_F(CommandListCreate, givenCommandListWhenRemoveDeallocationContainerDataT cmdContainer.getDeallocationContainer().clear(); } + +TEST(CommandList, givenContextGroupEnabledWhenCreatingImmediateCommandListThenEachCmdListHasDifferentCsr) { + + HardwareInfo hwInfo = *defaultHwInfo; + if (hwInfo.capabilityTable.defaultEngineType != aub_stream::EngineType::ENGINE_CCS) { + GTEST_SKIP(); + } + + DebugManagerStateRestore dbgRestorer; + debugManager.flags.ContextGroupSize.set(5); + + hwInfo.featureTable.flags.ftrCCSNode = true; + hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; + hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1; + + auto neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); + NEO::DeviceVector devices; + devices.push_back(std::unique_ptr(neoDevice)); + auto driverHandle = std::make_unique>(); + driverHandle->initialize(std::move(devices)); + auto device = driverHandle->devices[0]; + + ze_command_queue_desc_t desc = {}; + desc.ordinal = 0; + desc.index = 0; + ze_command_list_handle_t commandListHandle1, commandListHandle2; + + auto result = device->createCommandListImmediate(&desc, &commandListHandle1); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + result = device->createCommandListImmediate(&desc, &commandListHandle2); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto commandList1 = static_cast(L0::CommandList::fromHandle(commandListHandle1)); + auto commandList2 = static_cast(L0::CommandList::fromHandle(commandListHandle2)); + + EXPECT_NE(commandList1->getCsr(), commandList2->getCsr()); + + commandList1->destroy(); + commandList2->destroy(); +} + } // namespace ult } // namespace L0 \ No newline at end of file diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp index 5f3e1479c9..c7479e9501 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_3.cpp @@ -1322,5 +1322,46 @@ HWTEST2_F(CommandQueueTest, givenTwoCommandQueuesUsingOneCsrWhenExecuteCommandLi commandQueue2->destroy(); } +TEST(CommandQueue, givenContextGroupEnabledWhenCreatingCommandQueuesThenEachCmdQHasDifferentCsr) { + + HardwareInfo hwInfo = *defaultHwInfo; + if (hwInfo.capabilityTable.defaultEngineType != aub_stream::EngineType::ENGINE_CCS) { + GTEST_SKIP(); + } + + DebugManagerStateRestore dbgRestorer; + debugManager.flags.ContextGroupSize.set(5); + + hwInfo.featureTable.flags.ftrCCSNode = true; + hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; + hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1; + + auto neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo); + NEO::DeviceVector devices; + devices.push_back(std::unique_ptr(neoDevice)); + auto driverHandle = std::make_unique>(); + driverHandle->initialize(std::move(devices)); + auto device = driverHandle->devices[0]; + + ze_command_queue_desc_t desc = {}; + desc.ordinal = 0; + desc.index = 0; + ze_command_queue_handle_t commandQueueHandle1, commandQueueHandle2; + + auto result = device->createCommandQueue(&desc, &commandQueueHandle1); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + result = device->createCommandQueue(&desc, &commandQueueHandle2); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto commandQueue1 = static_cast(L0::CommandQueue::fromHandle(commandQueueHandle1)); + auto commandQueue2 = static_cast(L0::CommandQueue::fromHandle(commandQueueHandle2)); + + EXPECT_NE(commandQueue1->getCsr(), commandQueue2->getCsr()); + + commandQueue1->destroy(); + commandQueue2->destroy(); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp index 8f183d7184..d09b7ef4dd 100644 --- a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp +++ b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp @@ -3865,6 +3865,73 @@ HWTEST_F(DeviceTest, givenCooperativeDispatchSupportedWhenQueryingPropertiesFlag } } +HWTEST_F(DeviceTest, givenContextGroupSupportedWhenGettingLowPriorityCsrThenCorrectCsrAndContextIsReturned) { + struct MockGfxCoreHelper : NEO::GfxCoreHelperHw { + + const EngineInstancesContainer getGpgpuEngineInstances(const RootDeviceEnvironment &rootDeviceEnvironment) const override { + auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo(); + EngineInstancesContainer engines; + + uint32_t numCcs = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled; + + if (hwInfo.featureTable.flags.ftrCCSNode) { + for (uint32_t i = 0; i < numCcs; i++) { + auto engineType = static_cast(i + aub_stream::ENGINE_CCS); + engines.push_back({engineType, EngineUsage::regular}); + } + + engines.push_back({aub_stream::ENGINE_CCS, EngineUsage::lowPriority}); + } + if (hwInfo.featureTable.flags.ftrRcsNode) { + engines.push_back({aub_stream::ENGINE_RCS, EngineUsage::regular}); + } + + return engines; + } + EngineGroupType getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const override { + if (engineType == aub_stream::ENGINE_RCS) { + return EngineGroupType::renderCompute; + } + if (engineType >= aub_stream::ENGINE_CCS && engineType < (aub_stream::ENGINE_CCS + hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled)) { + return EngineGroupType::compute; + } + UNRECOVERABLE_IF(true); + } + }; + + DebugManagerStateRestore restorer; + debugManager.flags.ContextGroupSize.set(8); + + const uint32_t rootDeviceIndex = 0u; + auto hwInfo = *NEO::defaultHwInfo; + hwInfo.featureTable.flags.ftrCCSNode = true; + hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; + hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 2; + + MockExecutionEnvironment mockExecutionEnvironment{&hwInfo}; + RAIIGfxCoreHelperFactory raii(*mockExecutionEnvironment.rootDeviceEnvironments[rootDeviceIndex]); + + { + MockExecutionEnvironment *executionEnvironment = new MockExecutionEnvironment{&hwInfo}; + auto *neoMockDevice = NEO::MockDevice::createWithExecutionEnvironment(&hwInfo, executionEnvironment, rootDeviceIndex); + MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); + + NEO::CommandStreamReceiver *lowPriorityCsr = nullptr; + auto result = deviceImp.getCsrForLowPriority(&lowPriorityCsr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + ASSERT_NE(nullptr, lowPriorityCsr); + + ASSERT_EQ(8u, neoMockDevice->secondaryEngines[0].engines.size()); + for (int i = 0; i < 8; i++) { + EXPECT_NE(neoMockDevice->secondaryEngines[0].engines[i].osContext, &lowPriorityCsr->getOsContext()); + EXPECT_NE(neoMockDevice->secondaryEngines[0].engines[i].commandStreamReceiver, lowPriorityCsr); + } + + EXPECT_FALSE(lowPriorityCsr->getOsContext().isPartOfContextGroup()); + EXPECT_EQ(nullptr, lowPriorityCsr->getOsContext().getPrimaryContext()); + } +} + TEST(DevicePropertyFlagIsIntegratedTest, givenIntegratedDeviceThenCorrectDevicePropertyFlagSet) { std::unique_ptr> driverHandle; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); diff --git a/opencl/source/cl_device/cl_device.cpp b/opencl/source/cl_device/cl_device.cpp index 53c5358b71..14cda1ae14 100644 --- a/opencl/source/cl_device/cl_device.cpp +++ b/opencl/source/cl_device/cl_device.cpp @@ -137,7 +137,9 @@ ClDevice *ClDevice::getNearestGenericSubDevice(uint32_t deviceId) { bool ClDevice::getDeviceAndHostTimer(uint64_t *deviceTimestamp, uint64_t *hostTimestamp) const { return device.getDeviceAndHostTimer(deviceTimestamp, hostTimestamp); } bool ClDevice::getHostTimer(uint64_t *hostTimestamp) const { return device.getHostTimer(hostTimestamp); } const HardwareInfo &ClDevice::getHardwareInfo() const { return device.getHardwareInfo(); } -EngineControl &ClDevice::getEngine(aub_stream::EngineType engineType, EngineUsage engineUsage) { return device.getEngine(engineType, engineUsage); } +EngineControl &ClDevice::getEngine(aub_stream::EngineType engineType, EngineUsage engineUsage) { + return device.getEngine(engineType, engineUsage); +} EngineControl &ClDevice::getDefaultEngine() { return device.getDefaultEngine(); } EngineControl &ClDevice::getInternalEngine() { return device.getInternalEngine(); } SelectorCopyEngine &ClDevice::getSelectorCopyEngine() { return device.getSelectorCopyEngine(); } diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index f8c2cbe647..33485970bb 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -191,12 +191,22 @@ void CommandQueue::initializeGpgpu() const { auto defaultEngineType = device->getDefaultEngine().getEngineType(); + const GfxCoreHelper &gfxCoreHelper = getDevice().getGfxCoreHelper(); + bool secondaryContextsEnabled = gfxCoreHelper.areSecondaryContextsSupported(); + if (device->getDevice().isMultiRegularContextSelectionAllowed(defaultEngineType, EngineUsage::regular)) { this->gpgpuEngine = &device->getDevice().getNextEngineForMultiRegularContextMode(defaultEngineType); } else if (assignEngineRoundRobin) { this->gpgpuEngine = &device->getDevice().getNextEngineForCommandQueue(); } else { - this->gpgpuEngine = &device->getDefaultEngine(); + + if (secondaryContextsEnabled && EngineHelpers::isCcs(defaultEngineType)) { + gpgpuEngine = device->getDevice().getSecondaryEngineCsr(0, {defaultEngineType, EngineUsage::regular}); + } + + if (gpgpuEngine == nullptr) { + this->gpgpuEngine = &device->getDefaultEngine(); + } } this->initializeGpgpuInternals(); @@ -1188,6 +1198,7 @@ void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage const bool isEngineCopyOnly = EngineHelper::isCopyOnlyEngineType(engineGroupType); bool multiRegularContextAllowed = device->getDevice().isMultiRegularContextSelectionAllowed(engineType, engineUsage); + bool secondaryContextsEnabled = gfxCoreHelper.areSecondaryContextsSupported(); if (isEngineCopyOnly) { std::fill(bcsEngines.begin(), bcsEngines.end(), nullptr); @@ -1208,6 +1219,9 @@ void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage } else { if (multiRegularContextAllowed) { gpgpuEngine = &device->getDevice().getNextEngineForMultiRegularContextMode(engineType); + } else if (secondaryContextsEnabled && EngineHelpers::isCcs(engineType)) { + auto index = EngineHelpers::getCcsIndex(engineType); + gpgpuEngine = device->getDevice().getSecondaryEngineCsr(index, {engineType, engineUsage}); } else { gpgpuEngine = &device->getEngine(engineType, engineUsage); } diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index eb414d3249..2be3ecfbf9 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -8,6 +8,7 @@ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/preemption.h" +#include "shared/source/device/device.h" #include "shared/source/helpers/engine_control.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/os_interface/os_context.h" @@ -48,6 +49,7 @@ class CommandQueueHw : public CommandQueue { if (clPriority & static_cast(CL_QUEUE_PRIORITY_LOW_KHR)) { priority = QueuePriority::low; this->gpgpuEngine = &device->getNearestGenericSubDevice(0)->getEngine(getChosenEngineType(device->getHardwareInfo()), EngineUsage::lowPriority); + } else if (clPriority & static_cast(CL_QUEUE_PRIORITY_MED_KHR)) { priority = QueuePriority::medium; } else if (clPriority & static_cast(CL_QUEUE_PRIORITY_HIGH_KHR)) { diff --git a/opencl/test/unit_test/aub_tests/command_queue/compression_aub_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_queue/compression_aub_tests_xehp_and_later.cpp index bc7f8ef99e..6448c8c142 100644 --- a/opencl/test/unit_test/aub_tests/command_queue/compression_aub_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_queue/compression_aub_tests_xehp_and_later.cpp @@ -261,7 +261,7 @@ void CompressionXeHPAndLater::givenCompressedImageWhenReadingTh event); EXPECT_EQ(CL_SUCCESS, retVal); - allocation = csr->getTemporaryAllocations().peekHead(); + allocation = pCmdQ->getGpgpuCommandStreamReceiver().getTemporaryAllocations().peekHead(); while (allocation && allocation->getUnderlyingBuffer() != destMemory) { allocation = allocation->next; } diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index 5ec6199408..2653c17f42 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -2603,7 +2603,8 @@ HWTEST_F(CommandQueueOnSpecificEngineTests, givenMultipleFamiliesWhenCreatingQue fillProperties(properties, 0, 0); EngineControl &engineCcs = context.getDevice(0)->getEngine(aub_stream::ENGINE_CCS, EngineUsage::regular); MockCommandQueue queueRcs(&context, context.getDevice(0), properties, false); - EXPECT_EQ(&engineCcs, &queueRcs.getGpgpuEngine()); + EXPECT_EQ(engineCcs.osContext, queueRcs.getGpgpuEngine().osContext); + EXPECT_EQ(engineCcs.commandStreamReceiver, queueRcs.getGpgpuEngine().commandStreamReceiver); EXPECT_FALSE(queueRcs.isCopyOnly); EXPECT_TRUE(queueRcs.isQueueFamilySelected()); EXPECT_EQ(properties[1], queueRcs.getQueueFamilyIndex()); @@ -2653,7 +2654,7 @@ HWTEST_F(CommandQueueOnSpecificEngineTests, givenSubDeviceAndMultipleFamiliesWhe fillProperties(properties, 0, 0); EngineControl &engineCcs = context.getDevice(0)->getEngine(aub_stream::ENGINE_CCS, EngineUsage::regular); MockCommandQueue queueRcs(&context, context.getDevice(0), properties, false); - EXPECT_EQ(&engineCcs, &queueRcs.getGpgpuEngine()); + EXPECT_EQ(engineCcs.osContext, queueRcs.getGpgpuEngine().osContext); EXPECT_FALSE(queueRcs.isCopyOnly); EXPECT_TRUE(queueRcs.isQueueFamilySelected()); EXPECT_EQ(properties[1], queueRcs.getQueueFamilyIndex()); @@ -2734,8 +2735,12 @@ HWTEST_F(CommandQueueOnSpecificEngineTests, givenNotInitializedCcsOsContextWhenC const auto rcsFamilyIndex = static_cast(context.getDevice(0)->getDevice().getEngineGroupIndexFromEngineGroupType(EngineGroupType::renderCompute)); fillProperties(properties, rcsFamilyIndex, 0); MockCommandQueueHw queue(&context, context.getDevice(0), properties); - ASSERT_EQ(&osContext, queue.gpgpuEngine->osContext); - EXPECT_TRUE(osContext.isInitialized()); + + if (queue.gpgpuEngine->osContext->getPrimaryContext() == nullptr) { + ASSERT_EQ(&osContext, queue.gpgpuEngine->osContext); + } + + EXPECT_TRUE(queue.gpgpuEngine->osContext->isInitialized()); } struct CommandQueueCreateWithMultipleRegularContextsTests : public CommandQueueOnSpecificEngineTests { diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index c8953c0496..b2beb1a8f1 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -443,6 +443,10 @@ class CommandStreamReceiver { void preallocateCommandBuffer(); void preallocateInternalHeap(); + bool isInitialized() const { + return this->resourcesInitialized; + } + protected: void cleanupResources(); void printDeviceIndex(); diff --git a/shared/source/command_stream/command_stream_receiver_simulated_common_hw_base.inl b/shared/source/command_stream/command_stream_receiver_simulated_common_hw_base.inl index 045a908558..9c86559492 100644 --- a/shared/source/command_stream/command_stream_receiver_simulated_common_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_simulated_common_hw_base.inl @@ -22,6 +22,7 @@ #include "shared/source/os_interface/os_context.h" #include "aubstream/aub_manager.h" +#include "aubstream/aubstream.h" namespace NEO { @@ -43,6 +44,11 @@ void CommandStreamReceiverSimulatedCommonHw::setupContext(OsContext & uint32_t flags = 0; getCsTraits(engineType).setContextSaveRestoreFlags(flags); + if (osContext.isPartOfContextGroup()) { + constexpr uint32_t contextGroupBit = aub_stream::hardwareContextFlags::contextGroup; + flags |= contextGroupBit; + } + if (debugManager.flags.AppendAubStreamContextFlags.get() != -1) { flags |= static_cast(debugManager.flags.AppendAubStreamContextFlags.get()); } diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 1385245478..78830f1314 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -243,6 +243,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceComputeWalkerPostSyncFlush, -1, "-1: defaul DECLARE_DEBUG_VARIABLE(int32_t, NumberOfRegularContextsPerEngine, -1, "-1: default, >0: Create more than 1 Regular contexts for the same engine") DECLARE_DEBUG_VARIABLE(int32_t, EnableMultipleRegularContextForBcs, -1, "-1: default, 0: disabled, 1: Use NumberOfRegularContextsPerEngine to create multiple Regular contexts on the same engine") DECLARE_DEBUG_VARIABLE(int32_t, AppendAubStreamContextFlags, -1, "-1: default, >0: Append flags passed during HardwareContext creation.") +DECLARE_DEBUG_VARIABLE(int32_t, ContextGroupSize, -1, "-1: default, 0-1: context group disabled, >1: number of contexts in group.") DECLARE_DEBUG_VARIABLE(int32_t, DisableScratchPages, -1, "-1: default, 0: do not disable scratch pages during VM creations, 1: disable scratch pages during VM creations") DECLARE_DEBUG_VARIABLE(int32_t, OptimizeIoqBarriersHandling, -1, "-1: default, 0: disable, 1: enable. If enabled, dont dispatch stalling commands for IOQ. Instead, inherit TimestampPackets from previous enqueue.") DECLARE_DEBUG_VARIABLE(int32_t, ExitOnSubmissionNumber, -1, "Call exit(0) on X submission. >=0: submission count (start from 0)") diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index 9594d7e5dc..2702478dd7 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -307,6 +307,48 @@ bool Device::createEngines() { return false; } } + + if (gfxCoreHelper.areSecondaryContextsSupported()) { + + auto &hardwareInfo = this->getHardwareInfo(); + auto engineType = aub_stream::EngineType::ENGINE_CCS; + + if (tryGetEngine(engineType, EngineUsage::regular)) { + auto contextCount = gfxCoreHelper.getContextGroupContextsCount(); + auto highPriorityContextCount = std::min(contextCount / 2, 4u); + + const EngineGroupType engineGroupType = gfxCoreHelper.getEngineGroupType(engineType, EngineUsage::regular, hardwareInfo); + const auto engineGroupIndex = this->getEngineGroupIndexFromEngineGroupType(engineGroupType); + auto &engineGroup = this->getRegularEngineGroups()[engineGroupIndex]; + + secondaryEngines.resize(engineGroup.engines.size()); + + for (uint32_t engineIndex = 0; engineIndex < static_cast(engineGroup.engines.size()); engineIndex++) { + auto primaryEngine = engineGroup.engines[engineIndex]; + + secondaryEngines[engineIndex].regularEnginesTotal = contextCount - highPriorityContextCount; + secondaryEngines[engineIndex].highPriorityEnginesTotal = highPriorityContextCount; + secondaryEngines[engineIndex].regularCounter = 0; + secondaryEngines[engineIndex].highPriorityCounter = 0; + + NEO::EngineTypeUsage engineTypeUsage; + engineTypeUsage.first = primaryEngine.getEngineType(); + + secondaryEngines[engineIndex].engines.push_back(primaryEngine); + + for (uint32_t i = 1; i < contextCount; i++) { + engineTypeUsage.second = EngineUsage::regular; + + if (i >= contextCount - highPriorityContextCount) { + engineTypeUsage.second = EngineUsage::highPriority; + } + createSecondaryEngine(primaryEngine.commandStreamReceiver, engineIndex, engineTypeUsage); + } + + primaryEngine.osContext->setContextGroup(true); + } + } + } return true; } @@ -345,11 +387,13 @@ std::unique_ptr Device::createCommandStreamReceiver() con bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsage) { const auto &hwInfo = getHardwareInfo(); + auto &gfxCoreHelper = getGfxCoreHelper(); const auto engineType = engineTypeUsage.first; const auto engineUsage = engineTypeUsage.second; const auto defaultEngineType = engineInstanced ? this->engineInstancedType : getChosenEngineType(hwInfo); const bool isDefaultEngine = defaultEngineType == engineType && engineUsage == EngineUsage::regular; const bool createAsEngineInstanced = engineInstanced && EngineHelpers::isCcs(engineType); + const bool isPrimaryEngine = gfxCoreHelper.areSecondaryContextsSupported() && EngineHelpers::isCcs(engineType) && engineUsage == EngineUsage::regular; UNRECOVERABLE_IF(EngineHelpers::isBcs(engineType) && !hwInfo.capabilityTable.blitterOperationsSupported); @@ -370,7 +414,10 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa EngineDescriptor engineDescriptor(engineTypeUsage, getDeviceBitfield(), preemptionMode, false, createAsEngineInstanced); auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(commandStreamReceiver.get(), engineDescriptor); + osContext->setContextGroup(isPrimaryEngine); + commandStreamReceiver->setupContext(*osContext); + if (osContext->isImmediateContextInitializationEnabled(isDefaultEngine)) { if (!commandStreamReceiver->initializeResources()) { return false; @@ -389,6 +436,7 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa return false; } + bool firstSubmissionDone = false; if (isDefaultEngine) { bool defaultEngineAlreadySet = (allEngines.size() > defaultEngineIndex) && (allEngines[defaultEngineIndex].getEngineType() == engineType); @@ -400,10 +448,15 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa if (SubmissionStatus::success != commandStreamReceiver->initializeDeviceWithFirstSubmission()) { return false; } + firstSubmissionDone = true; } } } + if (isPrimaryEngine && !firstSubmissionDone) { + commandStreamReceiver->initializeDeviceWithFirstSubmission(); + } + if (EngineHelpers::isBcs(engineType) && (defaultBcsEngineIndex == std::numeric_limits::max()) && (engineUsage == EngineUsage::regular)) { defaultBcsEngineIndex = deviceCsrIndex; } @@ -419,6 +472,80 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa return true; } +bool Device::createSecondaryEngine(CommandStreamReceiver *primaryCsr, uint32_t index, EngineTypeUsage engineTypeUsage) { + auto engineUsage = engineTypeUsage.second; + std::unique_ptr commandStreamReceiver = createCommandStreamReceiver(); + if (!commandStreamReceiver) { + return false; + } + + bool internalUsage = (engineUsage == EngineUsage::internal); + if (internalUsage) { + commandStreamReceiver->initializeDefaultsForInternalEngine(); + } + + EngineDescriptor engineDescriptor(engineTypeUsage, getDeviceBitfield(), preemptionMode, false, false); + + auto osContext = executionEnvironment->memoryManager->createAndRegisterSecondaryOsContext(&primaryCsr->getOsContext(), commandStreamReceiver.get(), engineDescriptor); + commandStreamReceiver->setupContext(*osContext); + + EngineControl engine{commandStreamReceiver.get(), osContext}; + secondaryEngines[index].engines.push_back(engine); + + commandStreamReceivers.push_back(std::move(commandStreamReceiver)); + + return true; +} + +EngineControl *Device::getSecondaryEngineCsr(uint32_t engineIndex, EngineTypeUsage engineTypeUsage) { + + if (secondaryEngines.size() == 0 || !EngineHelpers::isCcs(engineTypeUsage.first) || engineIndex >= secondaryEngines.size()) { + return nullptr; + } + + auto secondaryEngineIndex = 0; + if (engineTypeUsage.second == EngineUsage::highPriority) { + secondaryEngineIndex = (secondaryEngines[engineIndex].highPriorityCounter.fetch_add(1)) % (secondaryEngines[engineIndex].highPriorityEnginesTotal); + secondaryEngineIndex += secondaryEngines[engineIndex].regularEnginesTotal; + } else if (engineTypeUsage.second == EngineUsage::regular) { + secondaryEngineIndex = (secondaryEngines[engineIndex].regularCounter.fetch_add(1)) % (secondaryEngines[engineIndex].regularEnginesTotal); + } else { + DEBUG_BREAK_IF(true); + } + + if (secondaryEngineIndex > 0) { + auto commandStreamReceiver = secondaryEngines[engineIndex].engines[secondaryEngineIndex].commandStreamReceiver; + + auto lock = commandStreamReceiver->obtainUniqueOwnership(); + + if (!commandStreamReceiver->isInitialized()) { + + if (commandStreamReceiver->needsPageTableManager()) { + commandStreamReceiver->createPageTableManager(); + } + + EngineDescriptor engineDescriptor(engineTypeUsage, getDeviceBitfield(), preemptionMode, false, false); + + if (!commandStreamReceiver->initializeResources()) { + return nullptr; + } + + if (!commandStreamReceiver->initializeTagAllocation()) { + return nullptr; + } + + if (!commandStreamReceiver->createGlobalFenceAllocation()) { + return nullptr; + } + + if (preemptionMode == PreemptionMode::MidThread && !commandStreamReceiver->createPreemptionAllocation()) { + return nullptr; + } + } + } + return &secondaryEngines[engineIndex].engines[secondaryEngineIndex]; +} + const HardwareInfo &Device::getHardwareInfo() const { return *getRootDeviceEnvironment().getHardwareInfo(); } const DeviceInfo &Device::getDeviceInfo() const { diff --git a/shared/source/device/device.h b/shared/source/device/device.h index 9a133b41a6..a17a760fd4 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -49,6 +49,25 @@ struct EngineGroupT { }; using EngineGroupsT = std::vector; +struct SecondaryContexts { + SecondaryContexts() = default; + SecondaryContexts(SecondaryContexts &&in) { + this->engines = std::move(in.engines); + this->regularCounter = in.regularCounter.load(); + this->highPriorityCounter = in.highPriorityCounter.load(); + this->regularEnginesTotal = in.regularEnginesTotal; + this->highPriorityEnginesTotal = in.highPriorityEnginesTotal; + } + SecondaryContexts(const SecondaryContexts &in) = delete; + SecondaryContexts &operator=(const SecondaryContexts &) = delete; + + EnginesT engines; // vector of secondary EngineControls + std::atomic regularCounter = 0; // Counter used to assign next regular EngineControl + std::atomic highPriorityCounter = 0; // Counter used to assign next highPriority EngineControl + uint32_t regularEnginesTotal; + uint32_t highPriorityEnginesTotal; +}; + struct RTDispatchGlobalsInfo { GraphicsAllocation *rtDispatchGlobalsArray = nullptr; std::vector rtStacks; // per tile @@ -169,6 +188,11 @@ class Device : public ReferenceTrackedObject { return getPreemptionMode() == PreemptionMode::MidThread || getDebugger() != nullptr; } + MOCKABLE_VIRTUAL EngineControl *getSecondaryEngineCsr(uint32_t engineIndex, EngineTypeUsage engineTypeUsage); + bool isSecondaryContextEngineType(aub_stream::EngineType type) { + return EngineHelpers::isCcs(type); + } + std::atomic debugExecutionCounter = 0; protected: @@ -191,6 +215,8 @@ class Device : public ReferenceTrackedObject { void addEngineToEngineGroup(EngineControl &engine); MOCKABLE_VIRTUAL bool createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsage); + MOCKABLE_VIRTUAL bool createSecondaryEngine(CommandStreamReceiver *primaryCsr, uint32_t index, EngineTypeUsage engineTypeUsage); + MOCKABLE_VIRTUAL std::unique_ptr createCommandStreamReceiver() const; MOCKABLE_VIRTUAL SubDevice *createSubDevice(uint32_t subDeviceIndex); MOCKABLE_VIRTUAL SubDevice *createEngineInstancedSubDevice(uint32_t subDeviceIndex, aub_stream::EngineType engineType); @@ -210,6 +236,9 @@ class Device : public ReferenceTrackedObject { std::unique_ptr performanceCounters; std::vector> commandStreamReceivers; EnginesT allEngines; + + std::vector secondaryEngines; + EngineGroupsT regularEngineGroups; std::vector subdevices; diff --git a/shared/source/helpers/engine_node_helper.cpp b/shared/source/helpers/engine_node_helper.cpp index e4c8467450..40e2fe4d82 100644 --- a/shared/source/helpers/engine_node_helper.cpp +++ b/shared/source/helpers/engine_node_helper.cpp @@ -154,6 +154,15 @@ aub_stream::EngineType remapEngineTypeToHwSpecific(aub_stream::EngineType inputT return inputType; } +uint32_t getCcsIndex(aub_stream::EngineType engineType) { + UNRECOVERABLE_IF(!isCcs(engineType)); + if (engineType == aub_stream::ENGINE_CCS) { + return 0; + } else { + return engineType - aub_stream::ENGINE_CCS; + } +} + uint32_t getBcsIndex(aub_stream::EngineType engineType) { UNRECOVERABLE_IF(!isBcs(engineType)); if (engineType == aub_stream::ENGINE_BCS) { diff --git a/shared/source/helpers/engine_node_helper.h b/shared/source/helpers/engine_node_helper.h index bf92961d82..932c7381ea 100644 --- a/shared/source/helpers/engine_node_helper.h +++ b/shared/source/helpers/engine_node_helper.h @@ -52,6 +52,7 @@ bool isBcsVirtualEngineEnabled(aub_stream::EngineType engineType); aub_stream::EngineType getBcsEngineType(const RootDeviceEnvironment &rootDeviceEnvironment, const DeviceBitfield &deviceBitfield, SelectorCopyEngine &selectorCopyEngine, bool internalUsage); void releaseBcsEngineType(aub_stream::EngineType engineType, SelectorCopyEngine &selectorCopyEngine, const RootDeviceEnvironment &rootDeviceEnvironment); aub_stream::EngineType remapEngineTypeToHwSpecific(aub_stream::EngineType inputType, const RootDeviceEnvironment &rootDeviceEnvironment); +uint32_t getCcsIndex(aub_stream::EngineType engineType); uint32_t getBcsIndex(aub_stream::EngineType engineType); aub_stream::EngineType getBcsEngineAtIdx(uint32_t idx); aub_stream::EngineType mapBcsIndexToEngineType(uint32_t index, bool includeMainCopyEngine); diff --git a/shared/source/helpers/gfx_core_helper.h b/shared/source/helpers/gfx_core_helper.h index cd7ccc9519..bf7bf42732 100644 --- a/shared/source/helpers/gfx_core_helper.h +++ b/shared/source/helpers/gfx_core_helper.h @@ -172,6 +172,10 @@ class GfxCoreHelper { virtual uint32_t overrideMaxWorkGroupSize(uint32_t maxWG) const = 0; virtual char const *getDefaultDeviceHierarchy() const = 0; static bool isWorkaroundRequired(uint32_t lowestSteppingWithBug, uint32_t steppingWithFix, const HardwareInfo &hwInfo, const ProductHelper &productHelper); + + virtual bool areSecondaryContextsSupported() const = 0; + virtual uint32_t getContextGroupContextsCount() const = 0; + virtual ~GfxCoreHelper() = default; protected: @@ -382,6 +386,10 @@ class GfxCoreHelperHw : public GfxCoreHelper { uint32_t calculateNumThreadsPerThreadGroup(uint32_t simd, uint32_t totalWorkItems, uint32_t grfSize, bool isHwLocalIdGeneration) const override; uint32_t overrideMaxWorkGroupSize(uint32_t maxWG) const override; char const *getDefaultDeviceHierarchy() const override; + + bool areSecondaryContextsSupported() const override; + uint32_t getContextGroupContextsCount() const override; + ~GfxCoreHelperHw() override = default; protected: diff --git a/shared/source/helpers/gfx_core_helper_base.inl b/shared/source/helpers/gfx_core_helper_base.inl index 7a31688f55..3db5252048 100644 --- a/shared/source/helpers/gfx_core_helper_base.inl +++ b/shared/source/helpers/gfx_core_helper_base.inl @@ -717,4 +717,17 @@ uint64_t GfxCoreHelperHw::getGpuTimeStampInNS(uint64_t timeStamp, dou return static_cast(static_cast(timeStamp & timestampMask) * resolution); } +template +bool GfxCoreHelperHw::areSecondaryContextsSupported() const { + return getContextGroupContextsCount() > 1; +} + +template +uint32_t GfxCoreHelperHw::getContextGroupContextsCount() const { + if (debugManager.flags.ContextGroupSize.get() != -1) { + return debugManager.flags.ContextGroupSize.get(); + } + return 0; +} + } // namespace NEO diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index 97a30728d0..cdf996116a 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -55,6 +55,7 @@ MemoryManager::MemoryManager(ExecutionEnvironment &executionEnvironment) : execu checkIsaPlacementOnceFlags = std::make_unique(rootEnvCount); isaInLocalMemory.resize(rootEnvCount); allRegisteredEngines.resize(rootEnvCount + 1); + secondaryEngines.resize(rootEnvCount + 1); for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < rootEnvCount; ++rootDeviceIndex) { auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]; @@ -85,6 +86,14 @@ MemoryManager::MemoryManager(ExecutionEnvironment &executionEnvironment) : execu } MemoryManager::~MemoryManager() { + for (auto &engineContainer : secondaryEngines) { + for (auto &engine : engineContainer) { + engine.osContext->decRefInternal(); + } + engineContainer.clear(); + } + secondaryEngines.clear(); + for (auto &engineContainer : allRegisteredEngines) { for (auto &engine : engineContainer) { engine.osContext->decRefInternal(); @@ -349,6 +358,25 @@ OsContext *MemoryManager::createAndRegisterOsContext(CommandStreamReceiver *comm return osContext; } +OsContext *MemoryManager::createAndRegisterSecondaryOsContext(const OsContext *primaryContext, CommandStreamReceiver *commandStreamReceiver, + const EngineDescriptor &engineDescriptor) { + auto rootDeviceIndex = commandStreamReceiver->getRootDeviceIndex(); + + updateLatestContextIdForRootDevice(rootDeviceIndex); + + auto contextId = primaryContext->getContextId(); + auto osContext = OsContext::create(peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->osInterface.get(), rootDeviceIndex, contextId, engineDescriptor); + osContext->incRefInternal(); + + osContext->setPrimaryContext(primaryContext); + + UNRECOVERABLE_IF(rootDeviceIndex != osContext->getRootDeviceIndex()); + + secondaryEngines[rootDeviceIndex].emplace_back(commandStreamReceiver, osContext); + + return osContext; +} + bool MemoryManager::getAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const void *hostPtr, const StorageInfo &storageInfo) { UNRECOVERABLE_IF(hostPtr == nullptr && !properties.flags.allocateMemory); UNRECOVERABLE_IF(properties.allocationType == AllocationType::unknown); diff --git a/shared/source/memory_manager/memory_manager.h b/shared/source/memory_manager/memory_manager.h index fc64f49bb3..7638a9408b 100644 --- a/shared/source/memory_manager/memory_manager.h +++ b/shared/source/memory_manager/memory_manager.h @@ -214,6 +214,9 @@ class MemoryManager { MOCKABLE_VIRTUAL OsContext *createAndRegisterOsContext(CommandStreamReceiver *commandStreamReceiver, const EngineDescriptor &engineDescriptor); + MOCKABLE_VIRTUAL OsContext *createAndRegisterSecondaryOsContext(const OsContext *primaryContext, CommandStreamReceiver *commandStreamReceiver, + const EngineDescriptor &engineDescriptor); + const EngineControlContainer &getRegisteredEngines(uint32_t rootDeviceIndex) const { return allRegisteredEngines[rootDeviceIndex]; } const MultiDeviceEngineControlContainer &getRegisteredEngines() const { return allRegisteredEngines; } const EngineControl *getRegisteredEngineForCsr(CommandStreamReceiver *commandStreamReceiver); @@ -334,6 +337,7 @@ class MemoryManager { bool supportsMultiStorageResources = true; ExecutionEnvironment &executionEnvironment; MultiDeviceEngineControlContainer allRegisteredEngines; + MultiDeviceEngineControlContainer secondaryEngines; std::unique_ptr hostPtrManager; uint32_t latestContextId = std::numeric_limits::max(); std::map rootDeviceIndexToContextId; // This map will contain initial value of latestContextId for each rootDeviceIndex diff --git a/shared/source/os_interface/os_context.h b/shared/source/os_interface/os_context.h index 5d68eb3965..58aae3f4da 100644 --- a/shared/source/os_interface/os_context.h +++ b/shared/source/os_interface/os_context.h @@ -71,6 +71,20 @@ class OsContext : public ReferenceTrackedObject { return (tlbFlushCounter.load() > lastFlushedTlbFlushCounter.load()); }; + void setPrimaryContext(const OsContext *primary) { + primaryContext = primary; + isContextGroup = true; + } + const OsContext *getPrimaryContext() const { + return primaryContext; + } + void setContextGroup(bool value) { + isContextGroup = value; + } + bool isPartOfContextGroup() { + return isContextGroup; + } + protected: virtual bool initializeContext() { return true; } @@ -92,5 +106,8 @@ class OsContext : public ReferenceTrackedObject { bool debuggableContext = false; bool engineInstancedDevice = false; uint8_t powerHintValue = 0; + + bool isContextGroup = false; + const OsContext *primaryContext = nullptr; }; } // namespace NEO diff --git a/shared/test/common/mocks/mock_device.h b/shared/test/common/mocks/mock_device.h index d826bfec72..8e6c5b4535 100644 --- a/shared/test/common/mocks/mock_device.h +++ b/shared/test/common/mocks/mock_device.h @@ -66,6 +66,7 @@ class MockDevice : public RootDevice { using Device::regularEngineGroups; using Device::rootCsrCreated; using Device::rtMemoryBackedBuffer; + using Device::secondaryEngines; using Device::uuid; using RootDevice::createEngines; using RootDevice::defaultEngineIndex; diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 5a4c5ecc11..75fd8b6f5f 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -515,6 +515,7 @@ ExperimentalForceCopyThroughLock = -1 NumberOfRegularContextsPerEngine = -1 EnableMultipleRegularContextForBcs = -1 AppendAubStreamContextFlags = -1 +ContextGroupSize=-1 DirectSubmissionRelaxedOrderingMinNumberOfClients = -1 UseDeprecatedClDeviceIpVersion = 0 ExperimentalCopyThroughLockWaitlistSizeThreshold= -1 diff --git a/shared/test/unit_test/device/neo_device_tests.cpp b/shared/test/unit_test/device/neo_device_tests.cpp index a5f7a726a7..58598d1307 100644 --- a/shared/test/unit_test/device/neo_device_tests.cpp +++ b/shared/test/unit_test/device/neo_device_tests.cpp @@ -12,6 +12,7 @@ #include "shared/source/memory_manager/gfx_partition.h" #include "shared/source/os_interface/device_factory.h" #include "shared/source/os_interface/driver_info.h" +#include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/product_helper_hw.h" #include "shared/source/release_helper/release_helper.h" @@ -877,7 +878,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, whenDeviceCreatesEnginesThenDeviceIsIn auto device = deviceFactory.rootDevices[0]; auto csr = device->allEngines[device->defaultEngineIndex].commandStreamReceiver; - EXPECT_EQ(device->isInitDeviceWithFirstSubmissionSupported(csr->getType()), csr->peekLatestSentTaskCount()); + + if (device->isInitDeviceWithFirstSubmissionSupported(csr->getType())) { + EXPECT_EQ(1u, csr->peekLatestSentTaskCount()); + } } TEST(FailDeviceTest, GivenFailedDeviceWhenCreatingDeviceThenNullIsReturned) { @@ -1003,3 +1007,176 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenXeHPAndLaterProductWhenRequestedV TEST_F(DeviceTests, whenCheckingPreferredPlatformNameThenNullIsReturned) { EXPECT_EQ(nullptr, defaultHwInfo->capabilityTable.preferredPlatformName); } + +TEST(Device, givenDifferentEngineTypesWhenIsSecondaryContextEngineTypeCalledThenTrueReturnedForCCS) { + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + + EXPECT_TRUE(device->isSecondaryContextEngineType(aub_stream::ENGINE_CCS)); + EXPECT_TRUE(device->isSecondaryContextEngineType(aub_stream::ENGINE_CCS1)); + EXPECT_TRUE(device->isSecondaryContextEngineType(aub_stream::ENGINE_CCS2)); + EXPECT_TRUE(device->isSecondaryContextEngineType(aub_stream::ENGINE_CCS3)); + + EXPECT_FALSE(device->isSecondaryContextEngineType(aub_stream::ENGINE_RCS)); + EXPECT_FALSE(device->isSecondaryContextEngineType(aub_stream::ENGINE_CCCS)); + EXPECT_FALSE(device->isSecondaryContextEngineType(aub_stream::ENGINE_BCS)); +} + +HWCMDTEST_F(IGFX_XE_HP_CORE, DeviceTests, givenCCSEngineAndContextGroupSizeEnabledWhenCreatingEngineThenItsContextHasContextGroupFlagSet) { + DebugManagerStateRestore dbgRestorer; + const uint32_t contextGroupSize = 8; + debugManager.flags.ContextGroupSize.set(contextGroupSize); + + HardwareInfo hwInfo = *defaultHwInfo; + hwInfo.featureTable.flags.ftrRcsNode = false; + hwInfo.featureTable.flags.ftrCCSNode = true; + hwInfo.featureTable.ftrBcsInfo = 0; + hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; + hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1; + + MockExecutionEnvironment executionEnvironment(&hwInfo, false, 1); + executionEnvironment.incRefInternal(); + + UltDeviceFactory deviceFactory{1, 0, executionEnvironment}; + + deviceFactory.rootDevices[0]->createEngine(0, {aub_stream::EngineType::ENGINE_CCS, EngineUsage::regular}); + + auto defaultEngine = deviceFactory.rootDevices[0]->getDefaultEngine(); + EXPECT_NE(nullptr, &defaultEngine); + + EXPECT_EQ(aub_stream::EngineType::ENGINE_CCS, defaultEngine.getEngineType()); + EXPECT_EQ(EngineUsage::regular, defaultEngine.getEngineUsage()); + + EXPECT_TRUE(defaultEngine.osContext->isPartOfContextGroup()); +} + +HWTEST_F(DeviceTests, givenCCSEnginesAndContextGroupSizeEnabledWhenDeviceIsCreatedThenSecondaryEnginesAreCreated) { + DebugManagerStateRestore dbgRestorer; + const uint32_t contextGroupSize = 8; + debugManager.flags.ContextGroupSize.set(contextGroupSize); + + HardwareInfo hwInfo = *defaultHwInfo; + hwInfo.featureTable.flags.ftrCCSNode = true; + hwInfo.featureTable.ftrBcsInfo = 0; + hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; + hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 2; + + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); + auto &engineGroups = device->getRegularEngineGroups(); + + auto engineGroupType = EngineGroupType::compute; + size_t computeEnginesCount = 0; + for (const auto &engine : engineGroups) { + if (engine.engineGroupType == engineGroupType) { + computeEnginesCount = engine.engines.size(); + } + } + + if (computeEnginesCount == 0) { + GTEST_SKIP(); + } + + ASSERT_EQ(computeEnginesCount, device->secondaryEngines.size()); + ASSERT_EQ(contextGroupSize, device->secondaryEngines[0].engines.size()); + + auto defaultEngine = device->getDefaultEngine(); + EXPECT_EQ(defaultEngine.commandStreamReceiver, device->secondaryEngines[0].engines[0].commandStreamReceiver); + + const uint32_t regularContextCount = std::min(contextGroupSize / 2, 4u); + + for (uint32_t ccsIndex = 0; ccsIndex < computeEnginesCount; ccsIndex++) { + EXPECT_TRUE(device->secondaryEngines[ccsIndex].engines[0].osContext->isPartOfContextGroup()); + EXPECT_EQ(nullptr, device->secondaryEngines[ccsIndex].engines[0].osContext->getPrimaryContext()); + + for (size_t i = 1; i < device->secondaryEngines[0].engines.size(); i++) { + EXPECT_EQ(device->secondaryEngines[ccsIndex].engines[0].osContext, device->secondaryEngines[ccsIndex].engines[i].osContext->getPrimaryContext()); + EXPECT_TRUE(device->secondaryEngines[ccsIndex].engines[i].osContext->isPartOfContextGroup()); + } + + EXPECT_EQ(0u, device->secondaryEngines[ccsIndex].regularCounter.load()); + EXPECT_EQ(0u, device->secondaryEngines[ccsIndex].highPriorityCounter.load()); + + EXPECT_EQ(regularContextCount, device->secondaryEngines[ccsIndex].regularEnginesTotal); + EXPECT_EQ(contextGroupSize - regularContextCount, device->secondaryEngines[ccsIndex].highPriorityEnginesTotal); + + for (size_t contextId = 0; contextId < regularContextCount + 1; contextId++) { + auto engine = device->getSecondaryEngineCsr(ccsIndex, {EngineHelpers::mapCcsIndexToEngineType(ccsIndex), EngineUsage::regular}); + ASSERT_NE(nullptr, engine); + + EXPECT_EQ(contextId + 1, device->secondaryEngines[ccsIndex].regularCounter.load()); + if (contextId == regularContextCount) { + EXPECT_EQ(&device->secondaryEngines[ccsIndex].engines[0], engine); + } + } + + for (size_t contextId = 0; contextId < contextGroupSize - regularContextCount + 1; contextId++) { + auto engine = device->getSecondaryEngineCsr(ccsIndex, {EngineHelpers::mapCcsIndexToEngineType(ccsIndex), EngineUsage::highPriority}); + ASSERT_NE(nullptr, engine); + + EXPECT_EQ(contextId + 1, device->secondaryEngines[ccsIndex].highPriorityCounter.load()); + if (contextId == contextGroupSize - regularContextCount) { + EXPECT_EQ(&device->secondaryEngines[ccsIndex].engines[regularContextCount], engine); + } + } + } + + auto internalEngine = device->getInternalEngine(); + EXPECT_NE(internalEngine.commandStreamReceiver, device->getSecondaryEngineCsr(0, {aub_stream::EngineType::ENGINE_CCS, EngineUsage::internal})->commandStreamReceiver); +} + +HWTEST_F(DeviceTests, givenContextGroupEnabledWhenGettingSecondaryEngineThenResourcesAndContextAreInitialized) { + + HardwareInfo hwInfo = *defaultHwInfo; + if (hwInfo.capabilityTable.defaultEngineType != aub_stream::EngineType::ENGINE_CCS) { + GTEST_SKIP(); + } + + DebugManagerStateRestore dbgRestorer; + debugManager.flags.ContextGroupSize.set(5); + + hwInfo.featureTable.flags.ftrCCSNode = true; + hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; + hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1; + + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(&hwInfo)); + const auto &gfxCoreHelper = device->getRootDeviceEnvironment().getHelper(); + + const auto ccsIndex = 0; + auto secondaryEnginesCount = device->secondaryEngines[ccsIndex].engines.size(); + ASSERT_EQ(5u, secondaryEnginesCount); + + EXPECT_TRUE(device->secondaryEngines[ccsIndex].engines[0].commandStreamReceiver->isInitialized()); + EXPECT_EQ(1u, device->secondaryEngines[ccsIndex].engines[0].commandStreamReceiver->peekLatestSentTaskCount()); + + for (uint32_t secondaryIndex = 1; secondaryIndex < secondaryEnginesCount; secondaryIndex++) { + + EXPECT_FALSE(device->secondaryEngines[ccsIndex].engines[secondaryIndex].osContext->isInitialized()); + EXPECT_FALSE(device->secondaryEngines[ccsIndex].engines[secondaryIndex].commandStreamReceiver->isInitialized()); + + EXPECT_EQ(nullptr, device->secondaryEngines[ccsIndex].engines[secondaryIndex].commandStreamReceiver->getTagAllocation()); + EXPECT_EQ(nullptr, device->secondaryEngines[ccsIndex].engines[secondaryIndex].commandStreamReceiver->getGlobalFenceAllocation()); + if (device->getPreemptionMode() == PreemptionMode::MidThread) { + EXPECT_EQ(nullptr, device->secondaryEngines[ccsIndex].engines[secondaryIndex].commandStreamReceiver->getPreemptionAllocation()); + } + + device->getSecondaryEngineCsr(ccsIndex, {EngineHelpers::mapCcsIndexToEngineType(ccsIndex), EngineUsage::regular}); + } + + for (uint32_t i = 0; i < device->secondaryEngines[ccsIndex].highPriorityEnginesTotal; i++) { + device->getSecondaryEngineCsr(ccsIndex, {EngineHelpers::mapCcsIndexToEngineType(ccsIndex), EngineUsage::highPriority}); + } + + for (uint32_t secondaryIndex = 0; secondaryIndex < secondaryEnginesCount; secondaryIndex++) { + + EXPECT_TRUE(device->secondaryEngines[ccsIndex].engines[secondaryIndex].osContext->isInitialized()); + EXPECT_TRUE(device->secondaryEngines[ccsIndex].engines[secondaryIndex].commandStreamReceiver->isInitialized()); + + EXPECT_NE(nullptr, device->secondaryEngines[ccsIndex].engines[secondaryIndex].commandStreamReceiver->getTagAllocation()); + + if (gfxCoreHelper.isFenceAllocationRequired(hwInfo)) { + EXPECT_NE(nullptr, device->secondaryEngines[ccsIndex].engines[secondaryIndex].commandStreamReceiver->getGlobalFenceAllocation()); + } + if (device->getPreemptionMode() == PreemptionMode::MidThread) { + EXPECT_NE(nullptr, device->secondaryEngines[ccsIndex].engines[secondaryIndex].commandStreamReceiver->getPreemptionAllocation()); + } + } +} diff --git a/shared/test/unit_test/helpers/engine_node_helper_tests.cpp b/shared/test/unit_test/helpers/engine_node_helper_tests.cpp index d13f0477e8..d1f85d8db0 100644 --- a/shared/test/unit_test/helpers/engine_node_helper_tests.cpp +++ b/shared/test/unit_test/helpers/engine_node_helper_tests.cpp @@ -70,6 +70,13 @@ TEST(EngineNodeHelperTest, givenCcsEngineWhenHelperIsUsedThenReturnTrue) { EXPECT_FALSE(EngineHelpers::isCcs(aub_stream::EngineType::NUM_ENGINES)); } +TEST(EngineNodeHelperTest, givenCcsWhenGettingCcsIndexThenReturnCorrectIndex) { + EXPECT_EQ(0u, EngineHelpers::getCcsIndex(aub_stream::ENGINE_CCS)); + EXPECT_EQ(1u, EngineHelpers::getCcsIndex(aub_stream::ENGINE_CCS1)); + EXPECT_EQ(2u, EngineHelpers::getCcsIndex(aub_stream::ENGINE_CCS2)); + EXPECT_EQ(3u, EngineHelpers::getCcsIndex(aub_stream::ENGINE_CCS3)); +} + TEST(EngineNodeHelperTest, givenInvalidEngineTypeWhenGettingStringRepresentationThenItIsCorrect) { EXPECT_EQ(std::string{"Unknown"}, EngineHelpers::engineTypeToString(aub_stream::EngineType::NUM_ENGINES)); EXPECT_EQ(std::string{"Unknown"}, EngineHelpers::engineTypeToString(static_cast(0xcc))); diff --git a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp index b77274dcf2..f0ecb963d2 100644 --- a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp +++ b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp @@ -1588,3 +1588,25 @@ HWTEST2_F(GfxCoreHelperTest, whenGetDefaultDeviceHierarchyThenReturnFlatHierarch auto defaultDeviceHierarchy = gfxCoreHelper.getDefaultDeviceHierarchy(); EXPECT_STREQ("COMPOSITE", defaultDeviceHierarchy); } + +HWTEST_F(GfxCoreHelperTest, givenContextGroupDisabledWhenContextGroupContextsCountAndSecondaryContextsSupportQueriedThenZeroCountAndFalseIsReturned) { + auto &gfxCoreHelper = getHelper(); + EXPECT_EQ(0u, gfxCoreHelper.getContextGroupContextsCount()); + EXPECT_FALSE(gfxCoreHelper.areSecondaryContextsSupported()); +} + +TEST_F(GfxCoreHelperTest, givenContextGroupEnabledWithDebugKeyWhenContextGroupContextsCountAndSecondaryContextsSupportQueriedThenCorrectValuesAreReturned) { + DebugManagerStateRestore restorer; + debugManager.flags.ContextGroupSize.set(8); + auto &gfxCoreHelper = getHelper(); + EXPECT_EQ(8u, gfxCoreHelper.getContextGroupContextsCount()); + EXPECT_TRUE(gfxCoreHelper.areSecondaryContextsSupported()); + + debugManager.flags.ContextGroupSize.set(1); + EXPECT_EQ(1u, gfxCoreHelper.getContextGroupContextsCount()); + EXPECT_FALSE(gfxCoreHelper.areSecondaryContextsSupported()); + + debugManager.flags.ContextGroupSize.set(2); + EXPECT_EQ(2u, gfxCoreHelper.getContextGroupContextsCount()); + EXPECT_TRUE(gfxCoreHelper.areSecondaryContextsSupported()); +} \ No newline at end of file