diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 06b6850c7f..321257da7e 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1881,7 +1881,7 @@ ze_result_t CommandListCoreFamily::programSyncBuffer(Kernel &kern uint32_t maximalNumberOfWorkgroupsAllowed; auto ret = kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed, this->engineGroupType, - device.getDefaultEngine().osContext->isEngineInstanced()); + device.isEngineInstanced()); UNRECOVERABLE_IF(ret != ZE_RESULT_SUCCESS); size_t requestedNumberOfWorkgroups = (pThreadGroupDimensions->groupCountX * pThreadGroupDimensions->groupCountY * pThreadGroupDimensions->groupCountZ); diff --git a/level_zero/core/source/device/device_imp_helper.cpp b/level_zero/core/source/device/device_imp_helper.cpp index 9f76cb1983..23aef4007e 100644 --- a/level_zero/core/source/device/device_imp_helper.cpp +++ b/level_zero/core/source/device/device_imp_helper.cpp @@ -13,7 +13,7 @@ namespace L0 { NEO::Device *DeviceImp::getActiveDevice() const { if (isMultiDeviceCapable()) { - return this->neoDevice->getThisOrNextNonRootCsrDevice(0); + return this->neoDevice->getNearestGenericSubDevice(0); } return this->neoDevice; } diff --git a/level_zero/core/test/unit_tests/gen9/test_cmdqueue_gen9.cpp b/level_zero/core/test/unit_tests/gen9/test_cmdqueue_gen9.cpp index c67fc827bb..299cfa6319 100644 --- a/level_zero/core/test/unit_tests/gen9/test_cmdqueue_gen9.cpp +++ b/level_zero/core/test/unit_tests/gen9/test_cmdqueue_gen9.cpp @@ -211,7 +211,7 @@ HWTEST2_F(CommandQueueGroupMultiDevice, L0::CommandQueueImp *cmdQueue = reinterpret_cast(commandList0->cmdQImmediate); L0::DeviceImp *deviceImp = reinterpret_cast(device); - auto expectedCSR = deviceImp->neoDevice->getThisOrNextNonRootCsrDevice(0)->getEngineGroups()[queueGroupOrdinal][queueGroupIndex].commandStreamReceiver; + auto expectedCSR = deviceImp->neoDevice->getNearestGenericSubDevice(0)->getEngineGroups()[queueGroupOrdinal][queueGroupIndex].commandStreamReceiver; EXPECT_EQ(cmdQueue->getCsr(), expectedCSR); } diff --git a/opencl/source/cl_device/cl_device.cpp b/opencl/source/cl_device/cl_device.cpp index 11a02eef9b..b1a33999aa 100644 --- a/opencl/source/cl_device/cl_device.cpp +++ b/opencl/source/cl_device/cl_device.cpp @@ -23,7 +23,7 @@ namespace NEO { -ClDevice::ClDevice(Device &device, Platform *platform) : device(device), platformId(platform) { +ClDevice::ClDevice(Device &device, ClDevice &rootClDevice, Platform *platform) : device(device), rootClDevice(rootClDevice), platformId(platform) { device.incRefInternal(); device.setSpecializedDevice(this); deviceExtensions.reserve(1000); @@ -39,7 +39,7 @@ ClDevice::ClDevice(Device &device, Platform *platform) : device(device), platfor if (numAvailableDevices > 1) { for (uint32_t i = 0; i < numAvailableDevices; i++) { auto &coreSubDevice = static_cast(*device.getSubDevice(i)); - auto pClSubDevice = std::make_unique(coreSubDevice, platform); + auto pClSubDevice = std::make_unique(coreSubDevice, rootClDevice, platform); pClSubDevice->incRefInternal(); pClSubDevice->decRefApi(); @@ -61,6 +61,9 @@ ClDevice::ClDevice(Device &device, Platform *platform) : device(device), platfor } } +ClDevice::ClDevice(Device &device, Platform *platformId) : ClDevice(device, *this, platformId) { +} + ClDevice::~ClDevice() { if (getSharedDeviceInfo().debuggerActive && getSourceLevelDebugger()) { @@ -123,7 +126,17 @@ ClDevice *ClDevice::getSubDevice(uint32_t deviceId) const { return subDevices[deviceId].get(); } -ClDevice *ClDevice::getThisOrNextNonRootCsrDevice(uint32_t deviceId) { +ClDevice *ClDevice::getNearestGenericSubDevice(uint32_t deviceId) { + /* + * EngineInstanced: Upper level + * Generic SubDevice: 'this' + * RootCsr Device: Next level SubDevice (generic) + */ + + if (getDevice().isEngineInstanced()) { + return rootClDevice.getNearestGenericSubDevice(Math::log2(static_cast(getDeviceBitfield().to_ulong()))); + } + if (subDevices.empty() || !getDevice().hasRootCsr()) { return const_cast(this); } diff --git a/opencl/source/cl_device/cl_device.h b/opencl/source/cl_device/cl_device.h index 24693fe1dd..c06953301f 100644 --- a/opencl/source/cl_device/cl_device.h +++ b/opencl/source/cl_device/cl_device.h @@ -51,6 +51,7 @@ class ClDevice : public BaseObject<_cl_device_id> { ClDevice(const ClDevice &) = delete; explicit ClDevice(Device &device, Platform *platformId); + explicit ClDevice(Device &device, ClDevice &rootClDevice, Platform *platformId); ~ClDevice() override; void incRefInternal(); @@ -116,7 +117,7 @@ class ClDevice : public BaseObject<_cl_device_id> { const ClDeviceInfo &getDeviceInfo() const { return deviceInfo; } const DeviceInfo &getSharedDeviceInfo() const; ClDevice *getSubDevice(uint32_t deviceId) const; - ClDevice *getThisOrNextNonRootCsrDevice(uint32_t deviceId); + ClDevice *getNearestGenericSubDevice(uint32_t deviceId); const std::string &peekCompilerExtensions() const; const std::string &peekCompilerExtensionsWithFeatures() const; DeviceBitfield getDeviceBitfield() const; @@ -138,6 +139,7 @@ class ClDevice : public BaseObject<_cl_device_id> { const std::string getClDeviceName(const HardwareInfo &hwInfo) const; Device &device; + ClDevice &rootClDevice; std::vector> subDevices; cl_platform_id platformId; diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index bff1a8d698..d4bd3fbc1b 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -83,7 +83,7 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr deferredTimestampPackets = std::make_unique(); } if (bcsAllowed) { - auto &neoDevice = device->getThisOrNextNonRootCsrDevice(0)->getDevice(); + auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice(); auto &selectorCopyEngine = neoDevice.getSelectorCopyEngine(); auto bcsEngineType = EngineHelpers::getBcsEngineType(hwInfo, device->getDeviceBitfield(), selectorCopyEngine, internalUsage); bcsEngine = neoDevice.tryGetEngine(bcsEngineType, EngineUsage::Regular); @@ -113,7 +113,7 @@ CommandQueue::~CommandQueue() { } if (bcsEngine) { - auto &selectorCopyEngine = device->getThisOrNextNonRootCsrDevice(0)->getSelectorCopyEngine(); + auto &selectorCopyEngine = device->getNearestGenericSubDevice(0)->getSelectorCopyEngine(); EngineHelpers::releaseBcsEngineType(bcsEngine->getEngineType(), selectorCopyEngine); } } diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index 7ac442a11e..edb76c3cd5 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -42,7 +42,7 @@ class CommandQueueHw : public CommandQueue { if (clPriority & static_cast(CL_QUEUE_PRIORITY_LOW_KHR)) { priority = QueuePriority::LOW; - this->gpgpuEngine = &device->getThisOrNextNonRootCsrDevice(0)->getEngine(getChosenEngineType(device->getHardwareInfo()), EngineUsage::LowPriority); + this->gpgpuEngine = &device->getNearestGenericSubDevice(0)->getEngine(getChosenEngineType(device->getHardwareInfo()), EngineUsage::LowPriority); } else if (clPriority & static_cast(CL_QUEUE_PRIORITY_MED_KHR)) { priority = QueuePriority::MEDIUM; } else if (clPriority & static_cast(CL_QUEUE_PRIORITY_HIGH_KHR)) { diff --git a/opencl/source/program/program.cpp b/opencl/source/program/program.cpp index fec838b80f..a0068254c4 100644 --- a/opencl/source/program/program.cpp +++ b/opencl/source/program/program.cpp @@ -55,7 +55,7 @@ Program::Program(Context *context, bool isBuiltIn, const ClDeviceVector &clDevic deviceBuildInfos[device] = {}; if (device->getNumAvailableDevices() > 1) { for (auto i = 0u; i < device->getNumAvailableDevices(); i++) { - auto subDevice = device->getThisOrNextNonRootCsrDevice(i); + auto subDevice = device->getNearestGenericSubDevice(i); if (isDeviceAssociated(*subDevice)) { deviceBuildInfos[device].associatedSubDevices.push_back(subDevice); } diff --git a/opencl/test/unit_test/device/sub_device_tests.cpp b/opencl/test/unit_test/device/sub_device_tests.cpp index 2303c428ce..49ce2742cb 100644 --- a/opencl/test/unit_test/device/sub_device_tests.cpp +++ b/opencl/test/unit_test/device/sub_device_tests.cpp @@ -425,7 +425,7 @@ TEST_F(EngineInstancedDeviceTests, givenDebugFlagSetAndZeroCcsesWhenCreatingRoot EXPECT_TRUE(hasAllEngines(rootDevice)); EXPECT_EQ(1u, rootDevice->getNumAvailableDevices()); - EXPECT_FALSE(rootDevice->getThisOrNextNonRootCsrDevice(0)->isSubDevice()); + EXPECT_FALSE(rootDevice->getNearestGenericSubDevice(0)->isSubDevice()); } TEST_F(EngineInstancedDeviceTests, givenDebugFlagSetAndSingleCcsWhenCreatingRootDeviceWithoutGenericSubDevicesThenDontCreateEngineInstanced) { @@ -440,7 +440,7 @@ TEST_F(EngineInstancedDeviceTests, givenDebugFlagSetAndSingleCcsWhenCreatingRoot EXPECT_TRUE(hasAllEngines(rootDevice)); EXPECT_EQ(1u, rootDevice->getNumAvailableDevices()); - EXPECT_FALSE(rootDevice->getThisOrNextNonRootCsrDevice(0)->isSubDevice()); + EXPECT_FALSE(rootDevice->getNearestGenericSubDevice(0)->isSubDevice()); } TEST_F(EngineInstancedDeviceTests, givenDebugFlagSetWhenCreatingRootDeviceWithGenericSubDevicesAndZeroCcsesThenDontCreateEngineInstanced) { @@ -556,16 +556,16 @@ TEST_F(EngineInstancedDeviceTests, givenMultipleSubDevicesWhenCallingGetSubDevic { EXPECT_EQ(rootDevice->getSubDevice(0), subDevice0); - EXPECT_EQ(rootDevice->getThisOrNextNonRootCsrDevice(0), subDevice0); + EXPECT_EQ(rootDevice->getNearestGenericSubDevice(0), subDevice0); EXPECT_EQ(rootDevice->getSubDevice(1), subDevice1); - EXPECT_EQ(rootDevice->getThisOrNextNonRootCsrDevice(1), subDevice1); + EXPECT_EQ(rootDevice->getNearestGenericSubDevice(1), subDevice1); } { - EXPECT_EQ(subDevice0->getThisOrNextNonRootCsrDevice(0), subDevice0); - EXPECT_EQ(subDevice0->getThisOrNextNonRootCsrDevice(1), subDevice0); - EXPECT_EQ(subDevice1->getThisOrNextNonRootCsrDevice(0), subDevice1); - EXPECT_EQ(subDevice1->getThisOrNextNonRootCsrDevice(1), subDevice1); + EXPECT_EQ(subDevice0->getNearestGenericSubDevice(0), subDevice0); + EXPECT_EQ(subDevice0->getNearestGenericSubDevice(1), subDevice0); + EXPECT_EQ(subDevice1->getNearestGenericSubDevice(0), subDevice1); + EXPECT_EQ(subDevice1->getNearestGenericSubDevice(1), subDevice1); } { @@ -576,10 +576,10 @@ TEST_F(EngineInstancedDeviceTests, givenMultipleSubDevicesWhenCallingGetSubDevic } { - EXPECT_EQ(subSubDevice00->getThisOrNextNonRootCsrDevice(0), subSubDevice00); - EXPECT_EQ(subSubDevice01->getThisOrNextNonRootCsrDevice(0), subSubDevice01); - EXPECT_EQ(subSubDevice10->getThisOrNextNonRootCsrDevice(0), subSubDevice10); - EXPECT_EQ(subSubDevice11->getThisOrNextNonRootCsrDevice(0), subSubDevice11); + EXPECT_EQ(subSubDevice00->getNearestGenericSubDevice(0), subDevice0); + EXPECT_EQ(subSubDevice01->getNearestGenericSubDevice(0), subDevice0); + EXPECT_EQ(subSubDevice10->getNearestGenericSubDevice(0), subDevice1); + EXPECT_EQ(subSubDevice11->getNearestGenericSubDevice(0), subDevice1); } { @@ -602,12 +602,12 @@ TEST_F(EngineInstancedDeviceTests, givenMultipleClSubDevicesWhenCallingGetSubDev auto subSubDevice = subDevice->getSubDevice(0); auto clRootDevice = std::make_unique(*rootDevice, nullptr); - auto clSubDevice = std::make_unique(*subDevice, nullptr); - auto clSubSubDevice = std::make_unique(*subSubDevice, nullptr); + auto clSubDevice = std::make_unique(*subDevice, *clRootDevice, nullptr); + auto clSubSubDevice = std::make_unique(*subSubDevice, *clRootDevice, nullptr); - EXPECT_EQ(clRootDevice->getSubDevice(0), clRootDevice->getThisOrNextNonRootCsrDevice(0)); - EXPECT_EQ(clSubDevice.get(), clSubDevice->getThisOrNextNonRootCsrDevice(0)); - EXPECT_EQ(clSubSubDevice.get(), clSubSubDevice->getThisOrNextNonRootCsrDevice(0)); + EXPECT_EQ(clRootDevice->getSubDevice(0), clRootDevice->getNearestGenericSubDevice(0)); + EXPECT_EQ(clSubDevice.get(), clSubDevice->getNearestGenericSubDevice(0)); + EXPECT_EQ(clRootDevice->getSubDevice(0), clSubSubDevice->getNearestGenericSubDevice(0)); } TEST_F(EngineInstancedDeviceTests, givenAffinityMaskSetWhenCreatingDevicesThenFilterMaskedDevices) { diff --git a/opencl/test/unit_test/helpers/engine_node_helper_tests_xehp_and_later.cpp b/opencl/test/unit_test/helpers/engine_node_helper_tests_xehp_and_later.cpp index 60b38d962e..48a0a476c6 100644 --- a/opencl/test/unit_test/helpers/engine_node_helper_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/helpers/engine_node_helper_tests_xehp_and_later.cpp @@ -16,6 +16,6 @@ using EngineNodeHelperTestsXeHPAndLater = ::Test; HWCMDTEST_F(IGFX_XE_HP_CORE, EngineNodeHelperTestsXeHPAndLater, WhenGetBcsEngineTypeIsCalledThenBcsEngineIsReturned) { const auto hwInfo = pDevice->getHardwareInfo(); - auto &selectorCopyEngine = pDevice->getThisOrNextNonRootCsrDevice(0)->getSelectorCopyEngine(); + auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine(); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, EngineHelpers::getBcsEngineType(hwInfo, {}, selectorCopyEngine, false)); } diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index ec81051594..11fc2782d3 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -34,7 +34,7 @@ struct BcsBufferTests : public ::testing::Test { class BcsMockContext : public MockContext { public: BcsMockContext(ClDevice *device) : MockContext(device) { - bcsOsContext.reset(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_CCS, EngineUsage::Regular}, device->getDeviceBitfield()))); + bcsOsContext.reset(OsContext::create(nullptr, 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, device->getDeviceBitfield()))); bcsCsr.reset(createCommandStream(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); bcsCsr->setupContext(*bcsOsContext); bcsCsr->initializeTagAllocation(); diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index bb0e81808e..83c2e941e4 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -491,7 +491,17 @@ Device *Device::getSubDevice(uint32_t deviceId) const { return subdevices[deviceId]; } -Device *Device::getThisOrNextNonRootCsrDevice(uint32_t deviceId) { +Device *Device::getNearestGenericSubDevice(uint32_t deviceId) { + /* + * EngineInstanced: Upper level + * Generic SubDevice: 'this' + * RootCsr Device: Next level SubDevice (generic) + */ + + if (engineInstanced) { + return getRootDevice()->getNearestGenericSubDevice(Math::log2(static_cast(deviceBitfield.to_ulong()))); + } + if (subdevices.empty() || !hasRootCsr()) { return const_cast(this); } @@ -553,7 +563,7 @@ EngineControl &Device::getInternalEngine() { auto engineType = getChosenEngineType(getHardwareInfo()); - return this->getThisOrNextNonRootCsrDevice(0)->getEngine(engineType, EngineUsage::Internal); + return this->getNearestGenericSubDevice(0)->getEngine(engineType, EngineUsage::Internal); } void Device::initializeRayTracing() { diff --git a/shared/source/device/device.h b/shared/source/device/device.h index 7b4b8bb806..36e3edc339 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -110,12 +110,13 @@ class Device : public ReferenceTrackedObject { virtual uint32_t getRootDeviceIndex() const = 0; uint32_t getNumAvailableDevices() const; Device *getSubDevice(uint32_t deviceId) const; - Device *getThisOrNextNonRootCsrDevice(uint32_t deviceId); + Device *getNearestGenericSubDevice(uint32_t deviceId); virtual Device *getRootDevice() const = 0; DeviceBitfield getDeviceBitfield() const { return deviceBitfield; }; uint32_t getNumSubDevices() const { return numSubDevices; } virtual bool isSubDevice() const = 0; bool hasRootCsr() const { return rootCsrCreated; } + bool isEngineInstanced() const { return engineInstanced; } BindlessHeapsHelper *getBindlessHeapsHelper() const; diff --git a/shared/source/helpers/blit_commands_helper.cpp b/shared/source/helpers/blit_commands_helper.cpp index 654326942a..d347eb88d2 100644 --- a/shared/source/helpers/blit_commands_helper.cpp +++ b/shared/source/helpers/blit_commands_helper.cpp @@ -189,7 +189,7 @@ BlitOperationResult BlitHelper::blitMemoryToAllocationBanks(const Device &device } UNRECOVERABLE_IF(!pRootDevice->getDeviceBitfield().test(tileId)); - auto pDeviceForBlit = pRootDevice->getThisOrNextNonRootCsrDevice(tileId); + auto pDeviceForBlit = pRootDevice->getNearestGenericSubDevice(tileId); auto &selectorCopyEngine = pDeviceForBlit->getSelectorCopyEngine(); auto deviceBitfield = pDeviceForBlit->getDeviceBitfield();