diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index afe0e6a835..d45f0b104a 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1055,7 +1055,7 @@ ze_result_t CommandListCoreFamily::appendImageCopyToMemoryExt(voi return ZE_RESULT_ERROR_UNKNOWN; } - if (this->device->getNEODevice()->isAnyDirectSubmissionEnabled()) { + if (this->device->getNEODevice()->isAnyDirectSubmissionEnabled(false)) { NEO::PipeControlArgs pipeControlArgs; pipeControlArgs.textureCacheInvalidationEnable = true; NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), pipeControlArgs); @@ -4016,7 +4016,7 @@ void CommandListCoreFamily::dispatchPostSyncCommands(const CmdLis if (isImmediateType()) { pipeControlArgs.constantCacheInvalidationEnable = getCsr(false)->isDirectSubmissionEnabled(); } else { - pipeControlArgs.constantCacheInvalidationEnable = this->device->getNEODevice()->isAnyDirectSubmissionEnabled(); + pipeControlArgs.constantCacheInvalidationEnable = this->device->getNEODevice()->isAnyDirectSubmissionEnabled(false); } } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index a772eb2eef..b6ec1bd1e8 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -626,7 +626,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, device->getNEODevice()->getRootDeviceEnvironment()), cmd->getDcFlushEnable()); - EXPECT_EQ(device->getNEODevice()->isAnyDirectSubmissionEnabled(), cmd->getConstantCacheInvalidationEnable()); + EXPECT_EQ(device->getNEODevice()->isAnyDirectSubmissionEnabled(false), cmd->getConstantCacheInvalidationEnable()); EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); postSyncFound++; gpuAddress += event->getSinglePacketSize(); diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index d7f3de1b27..339c4ee3a5 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -357,7 +357,7 @@ class CommandStreamReceiver { MOCKABLE_VIRTUAL void startControllingDirectSubmissions(); - bool isAnyDirectSubmissionEnabled() { + bool isAnyDirectSubmissionEnabled() const { return this->isDirectSubmissionEnabled() || isBlitterDirectSubmissionEnabled(); } diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index 70aa42e36c..10c390ccf9 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -1064,13 +1064,14 @@ void Device::stopDirectSubmissionAndWaitForCompletion() { } } -bool Device::isAnyDirectSubmissionEnabled() { - bool enabled = false; - for (auto &engine : allEngines) { - auto csr = engine.commandStreamReceiver; - enabled |= csr->isAnyDirectSubmissionEnabled(); +bool Device::isAnyDirectSubmissionEnabled(bool light) const { + for (const auto &engine : allEngines) { + auto enabled = light ? engine.osContext->isDirectSubmissionLightActive() : engine.commandStreamReceiver->isAnyDirectSubmissionEnabled(); + if (enabled) { + return true; + } } - return enabled; + return false; } void Device::allocateRTDispatchGlobals(uint32_t maxBvhLevels) { diff --git a/shared/source/device/device.h b/shared/source/device/device.h index 0b58e550f2..26c801389e 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -202,7 +202,7 @@ class Device : public ReferenceTrackedObject { return deviceUsmMemAllocPoolsManager.get(); } MOCKABLE_VIRTUAL void stopDirectSubmissionAndWaitForCompletion(); - bool isAnyDirectSubmissionEnabled(); + MOCKABLE_VIRTUAL bool isAnyDirectSubmissionEnabled(bool light) const; bool isStateSipRequired() const { return (getPreemptionMode() == PreemptionMode::MidThread || getDebugger() != nullptr) && getCompilerInterface(); } diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index 985a9c5e2b..3045bde4c2 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -845,7 +845,9 @@ void SVMAllocsManager::initUsmAllocationsCaches(Device &device) { this->usmDeviceAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableDeviceAllocationCache.get(); } if (this->usmDeviceAllocationsCacheEnabled) { - device.getExecutionEnvironment()->initializeUnifiedMemoryReuseCleaner(); + if (!device.isAnyDirectSubmissionEnabled(true)) { + device.getExecutionEnvironment()->initializeUnifiedMemoryReuseCleaner(); + } this->initUsmDeviceAllocationsCache(device); } @@ -854,7 +856,9 @@ void SVMAllocsManager::initUsmAllocationsCaches(Device &device) { this->usmHostAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableHostAllocationCache.get(); } if (this->usmHostAllocationsCacheEnabled) { - device.getExecutionEnvironment()->initializeUnifiedMemoryReuseCleaner(); + if (!device.isAnyDirectSubmissionEnabled(true)) { + device.getExecutionEnvironment()->initializeUnifiedMemoryReuseCleaner(); + } this->initUsmHostAllocationsCache(); } } diff --git a/shared/test/common/mocks/mock_device.h b/shared/test/common/mocks/mock_device.h index 54055b367b..34fdb73d20 100644 --- a/shared/test/common/mocks/mock_device.h +++ b/shared/test/common/mocks/mock_device.h @@ -170,6 +170,10 @@ class MockDevice : public RootDevice { rtDispatchGlobalsForceAllocation = true; } + bool isAnyDirectSubmissionEnabled(bool light) const override { + return anyDirectSubmissionEnabledReturnValue; + } + void stopDirectSubmissionAndWaitForCompletion() override { stopDirectSubmissionCalled = true; Device::stopDirectSubmissionAndWaitForCompletion(); @@ -187,6 +191,7 @@ class MockDevice : public RootDevice { static ExecutionEnvironment *prepareExecutionEnvironment(const HardwareInfo *pHwInfo); static decltype(&createCommandStream) createCommandStreamReceiverFunc; + bool anyDirectSubmissionEnabledReturnValue = false; bool callBaseGetMaxParameterSizeFromIGC = false; bool callBaseVerifyAdapterLuid = true; bool verifyAdapterLuidReturnValue = true; diff --git a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp index c86efab166..1ef53a8424 100644 --- a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp +++ b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp @@ -202,6 +202,23 @@ HWTEST_F(SvmDeviceAllocationCacheTest, givenOclApiSpecificConfigWhenCheckingIfEn } } +TEST_F(SvmDeviceAllocationCacheTest, givenAllocationCacheEnabledWhenDirectSubmissionLightActiveThenCleanerDisabled) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); + debugManager.flags.ExperimentalEnableHostAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + device->anyDirectSubmissionEnabledReturnValue = true; + + svmManager->initUsmAllocationsCaches(*device); + + ASSERT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + EXPECT_FALSE(device->getExecutionEnvironment()->unifiedMemoryReuseCleaner.get()); +} + struct SvmDeviceAllocationCacheSimpleTestDataType { size_t allocationSize; void *allocation;