From e652679b9503bc91eb92998c39413107e4a0360d Mon Sep 17 00:00:00 2001 From: "Grochowski, Stanislaw" Date: Thu, 10 Jul 2025 13:41:47 +0000 Subject: [PATCH] fix: correct pollForCompletion in aub on program termination Related-To: NEO-14867 Signed-off-by: Grochowski, Stanislaw --- level_zero/core/source/device/device_imp.cpp | 1 + opencl/source/platform/platform.cpp | 4 +- shared/source/device/device.cpp | 10 +++ shared/source/device/device.h | 2 +- .../libult/ult_command_stream_receiver.h | 4 + shared/test/common/mocks/mock_device.h | 12 +++ .../unit_test/device/neo_device_tests.cpp | 81 ++++++++++++++++++- 7 files changed, 108 insertions(+), 6 deletions(-) diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index eaa889a12d..e72a18104e 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -1770,6 +1770,7 @@ void DeviceImp::releaseResources() { allocationsForReuse->freeAllGraphicsAllocations(neoDevice); allocationsForReuse.reset(); } + neoDevice->pollForCompletion(); neoDevice->decRefInternal(); neoDevice = nullptr; diff --git a/opencl/source/platform/platform.cpp b/opencl/source/platform/platform.cpp index 7f68547e15..536e9b2ef6 100644 --- a/opencl/source/platform/platform.cpp +++ b/opencl/source/platform/platform.cpp @@ -46,9 +46,7 @@ Platform::~Platform() { for (auto clDevice : this->clDevices) { clDevice->getDevice().getRootDeviceEnvironmentRef().debugger.reset(nullptr); clDevice->getDevice().stopDirectSubmissionAndWaitForCompletion(); - if (clDevice->getDevice().getDefaultEngine().commandStreamReceiver->isAubMode()) { - clDevice->getDevice().pollForCompletion(); - } + clDevice->getDevice().pollForCompletion(); clDevice->decRefInternal(); } diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index 841747636b..167db0b7d1 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -1085,10 +1085,20 @@ void Device::stopDirectSubmissionAndWaitForCompletion() { } void Device::pollForCompletion() { + if (allEngines.size() == 0 || !getDefaultEngine().commandStreamReceiver->isAubMode()) { + return; + } + for (auto &engine : allEngines) { auto csr = engine.commandStreamReceiver; csr->pollForCompletion(); } + + for (auto &subDevice : subdevices) { + if (subDevice != nullptr) { + subDevice->pollForCompletion(); + } + } } bool Device::isAnyDirectSubmissionEnabled() const { diff --git a/shared/source/device/device.h b/shared/source/device/device.h index aaff376131..38832b9c82 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -212,7 +212,7 @@ class Device : public ReferenceTrackedObject, NEO::NonCopyableAndNonMova return usmMemAllocPool.get(); } MOCKABLE_VIRTUAL void stopDirectSubmissionAndWaitForCompletion(); - void pollForCompletion(); + MOCKABLE_VIRTUAL void pollForCompletion(); bool isAnyDirectSubmissionEnabled() const; bool isAnyDirectSubmissionLightEnabled() const; bool isStateSipRequired() const { diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index 1a4d592370..2d43bda68e 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -500,6 +500,10 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw { return commandStreamReceiverType; } + void setType(CommandStreamReceiverType commandStreamReceiverType) { + this->commandStreamReceiverType = commandStreamReceiverType; + } + void pollForCompletion(bool skipTaskCountCheck) override { pollForCompletionCalled++; } diff --git a/shared/test/common/mocks/mock_device.h b/shared/test/common/mocks/mock_device.h index 3732a72967..835fa4d0d9 100644 --- a/shared/test/common/mocks/mock_device.h +++ b/shared/test/common/mocks/mock_device.h @@ -44,7 +44,13 @@ struct MockSubDevice : public SubDevice { static decltype(&createCommandStream) createCommandStreamReceiverFunc; bool failOnCreateEngine = false; + bool pollForCompletionCalled = false; + bool createEngine(EngineTypeUsage engineTypeUsage) override; + void pollForCompletion() override { + pollForCompletionCalled = true; + Device::pollForCompletion(); + } }; class MockDevice : public RootDevice { @@ -173,6 +179,11 @@ class MockDevice : public RootDevice { Device::stopDirectSubmissionAndWaitForCompletion(); } + void pollForCompletion() override { + pollForCompletionCalled = true; + Device::pollForCompletion(); + } + uint64_t getGlobalMemorySize(uint32_t deviceBitfield) const override { if (callBaseGetGlobalMemorySize) { return Device::getGlobalMemorySize(deviceBitfield); @@ -192,6 +203,7 @@ class MockDevice : public RootDevice { size_t maxParameterSizeFromIGC = 0u; bool rtDispatchGlobalsForceAllocation = true; bool stopDirectSubmissionCalled = false; + bool pollForCompletionCalled = false; ReleaseHelper *mockReleaseHelper = nullptr; AILConfiguration *mockAilConfigurationHelper = nullptr; uint64_t getGlobalMemorySizeReturn = 0u; diff --git a/shared/test/unit_test/device/neo_device_tests.cpp b/shared/test/unit_test/device/neo_device_tests.cpp index 6057f65c3e..1e3ce086e5 100644 --- a/shared/test/unit_test/device/neo_device_tests.cpp +++ b/shared/test/unit_test/device/neo_device_tests.cpp @@ -2706,7 +2706,7 @@ TEST(GroupDevicesTest, givenNullInputInDeviceVectorWhenGroupDevicesThenEmptyVect EXPECT_TRUE(groupedDevices.empty()); } -HWTEST_F(DeviceTests, givenDeviceWhenPollForCompletionCalledThenPollForCompletionCalledOnAllCommandStreamReceivers) { +HWTEST_F(DeviceTests, givenVariousCsrModeWhenDevicePollForCompletionIsCalledThenPollForCompletionIsCalledCorrectlyOnCommandStreamReceivers) { auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); std::vector csrCallCounts; csrCallCounts.reserve(device->commandStreamReceivers.size()); @@ -2716,10 +2716,87 @@ HWTEST_F(DeviceTests, givenDeviceWhenPollForCompletionCalledThenPollForCompletio csrCallCounts.push_back(csr.pollForCompletionCalled); } + device->getUltCommandStreamReceiver().setType(CommandStreamReceiverType::hardwareWithAub); device->pollForCompletion(); for (uint32_t csrIndex = 0; csrIndex < device->commandStreamReceivers.size(); csrIndex++) { auto &csr = device->getUltCommandStreamReceiverFromIndex(csrIndex); - EXPECT_EQ(csrCallCounts[csrIndex] + 1, csr.pollForCompletionCalled); + EXPECT_EQ(++csrCallCounts[csrIndex], csr.pollForCompletionCalled); + } + + device->getUltCommandStreamReceiver().setType(CommandStreamReceiverType::hardware); + device->pollForCompletion(); + + for (uint32_t csrIndex = 0; csrIndex < device->commandStreamReceivers.size(); csrIndex++) { + auto &csr = device->getUltCommandStreamReceiverFromIndex(csrIndex); + EXPECT_EQ(csrCallCounts[csrIndex], csr.pollForCompletionCalled); } } + +HWTEST_F(DeviceTests, givenDeviceWithNoEnginesWhenPollForCompletionIsCalledThenEarlyReturnAndDontCrash) { + auto device = std::unique_ptr(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + + auto &allEngines = const_cast &>(device->getAllEngines()); + allEngines.clear(); + + EXPECT_EQ(device->allEngines.size(), 0u); + EXPECT_NO_THROW(device->pollForCompletion()); +} + +HWTEST_F(DeviceTests, givenRootDeviceWhenPollForCompletionIsCalledThenPollForCompletionIsCalledOnAllSubDevices) { + UltDeviceFactory factory{1, 2}; + auto rootDevice = factory.rootDevices[0]; + for (auto subDevice : factory.subDevices) { + auto *mockSubDevice = static_cast(subDevice); + EXPECT_FALSE(mockSubDevice->pollForCompletionCalled); + } + + rootDevice->getUltCommandStreamReceiver().setType(CommandStreamReceiverType::hardwareWithAub); + rootDevice->pollForCompletion(); + + for (auto subDevice : factory.subDevices) { + auto *mockSubDevice = static_cast(subDevice); + EXPECT_TRUE(mockSubDevice->pollForCompletionCalled); + } +} + +HWTEST_F(DeviceTests, givenMaskedSubDevicesWhenCallingPollForCompletionOnRootDeviceThenPollForCompletionIsCalledOnlyOnMaskedDevices) { + constexpr uint32_t numSubDevices = 3; + constexpr uint32_t numMaskedSubDevices = 2; + + DebugManagerStateRestore restorer; + debugManager.flags.CreateMultipleSubDevices.set(numSubDevices); + debugManager.flags.ZE_AFFINITY_MASK.set("0.0,0.2"); + + auto executionEnvironment = std::make_unique(); + executionEnvironment->prepareRootDeviceEnvironments(1); + + executionEnvironment->rootDeviceEnvironments[0]->setHwInfoAndInitHelpers(defaultHwInfo.get()); + executionEnvironment->rootDeviceEnvironments[0]->initGmm(); + executionEnvironment->parseAffinityMask(); + UltDeviceFactory deviceFactory{1, numSubDevices, *executionEnvironment.release()}; + auto rootDevice = deviceFactory.rootDevices[0]; + EXPECT_NE(nullptr, rootDevice); + EXPECT_EQ(numMaskedSubDevices, rootDevice->getNumSubDevices()); + + for (auto subDevice : rootDevice->getSubDevices()) { + if (subDevice != nullptr) { + auto *mockSubDevice = static_cast(subDevice); + EXPECT_FALSE(mockSubDevice->pollForCompletionCalled); + } + } + + rootDevice->getUltCommandStreamReceiver().setType(CommandStreamReceiverType::hardwareWithAub); + rootDevice->pollForCompletion(); + + unsigned int callCount = 0; + for (auto subDevice : rootDevice->getSubDevices()) { + if (subDevice != nullptr) { + auto *mockSubDevice = static_cast(subDevice); + if (mockSubDevice->pollForCompletionCalled) { + callCount++; + } + } + } + EXPECT_EQ(callCount, numMaskedSubDevices); +} \ No newline at end of file