diff --git a/level_zero/core/source/memory/cpu_page_fault_memory_manager.cpp b/level_zero/core/source/memory/cpu_page_fault_memory_manager.cpp index 245c8e4379..674e19d8d6 100644 --- a/level_zero/core/source/memory/cpu_page_fault_memory_manager.cpp +++ b/level_zero/core/source/memory/cpu_page_fault_memory_manager.cpp @@ -18,6 +18,7 @@ namespace NEO { void PageFaultManager::transferToCpu(void *ptr, size_t size, void *device) { L0::DeviceImp *deviceImp = static_cast(device); + deviceImp->getNEODevice()->stopDirectSubmissionForCopyEngine(); NEO::SvmAllocationData *allocData = deviceImp->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); UNRECOVERABLE_IF(allocData == nullptr); @@ -30,6 +31,7 @@ void PageFaultManager::transferToCpu(void *ptr, size_t size, void *device) { } void PageFaultManager::transferToGpu(void *ptr, void *device) { L0::DeviceImp *deviceImp = static_cast(device); + deviceImp->getNEODevice()->stopDirectSubmissionForCopyEngine(); NEO::SvmAllocationData *allocData = deviceImp->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr); UNRECOVERABLE_IF(allocData == nullptr); diff --git a/opencl/source/memory_manager/cpu_page_fault_manager_memory_sync.cpp b/opencl/source/memory_manager/cpu_page_fault_manager_memory_sync.cpp index 1a8170a959..e6a18b611f 100644 --- a/opencl/source/memory_manager/cpu_page_fault_manager_memory_sync.cpp +++ b/opencl/source/memory_manager/cpu_page_fault_manager_memory_sync.cpp @@ -18,11 +18,15 @@ namespace NEO { void PageFaultManager::transferToCpu(void *ptr, size_t size, void *cmdQ) { auto commandQueue = static_cast(cmdQ); + commandQueue->getDevice().stopDirectSubmissionForCopyEngine(); + auto retVal = commandQueue->enqueueSVMMap(true, CL_MAP_WRITE, ptr, size, 0, nullptr, nullptr, false); UNRECOVERABLE_IF(retVal); } void PageFaultManager::transferToGpu(void *ptr, void *cmdQ) { auto commandQueue = static_cast(cmdQ); + commandQueue->getDevice().stopDirectSubmissionForCopyEngine(); + memoryData[ptr].unifiedMemoryManager->insertSvmMapOperation(ptr, memoryData[ptr].size, ptr, 0, false); auto retVal = commandQueue->enqueueSVMUnmap(ptr, 0, nullptr, nullptr, false); UNRECOVERABLE_IF(retVal); diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index 2dcc73aff2..1cdabfaa3b 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -1231,4 +1231,20 @@ EngineControl *SecondaryContexts::getEngine(EngineUsage usage) { return &engines[secondaryEngineIndex]; } +void Device::stopDirectSubmissionForCopyEngine() { + auto internalBcsEngine = getInternalCopyEngine(); + if (internalBcsEngine == nullptr || getHardwareInfo().featureTable.ftrBcsInfo.count() > 1) { + return; + } + auto regularBcsEngine = tryGetEngine(internalBcsEngine->osContext->getEngineType(), EngineUsage::regular); + if (regularBcsEngine == nullptr) { + return; + } + auto regularBcs = regularBcsEngine->commandStreamReceiver; + if (regularBcs->isAnyDirectSubmissionEnabled()) { + auto lock = regularBcs->obtainUniqueOwnership(); + regularBcs->stopDirectSubmission(true); + } +} + } // namespace NEO diff --git a/shared/source/device/device.h b/shared/source/device/device.h index 8721f73891..5ff0e4fb85 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -216,6 +216,8 @@ class Device : public ReferenceTrackedObject { std::atomic debugExecutionCounter = 0; + void stopDirectSubmissionForCopyEngine(); + std::unique_lock obtainAllocationsReuseLock() const { return std::unique_lock(allocationsReuseMtx); } diff --git a/shared/test/unit_test/device/neo_device_tests.cpp b/shared/test/unit_test/device/neo_device_tests.cpp index f0e332db44..f6646cf0b9 100644 --- a/shared/test/unit_test/device/neo_device_tests.cpp +++ b/shared/test/unit_test/device/neo_device_tests.cpp @@ -25,6 +25,7 @@ #include "shared/test/common/helpers/raii_product_helper.h" #include "shared/test/common/helpers/ult_hw_config.h" #include "shared/test/common/helpers/variable_backup.h" +#include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_allocation_properties.h" #include "shared/test/common/mocks/mock_builtins.h" #include "shared/test/common/mocks/mock_compiler_interface.h" @@ -1918,3 +1919,31 @@ TEST(DeviceWithoutAILTest, givenNoAILWhenCreateDeviceThenDeviceIsCreated) { EXPECT_NE(nullptr, device.get()); } + +HWTEST_F(DeviceTests, givenCopyInternalEngineWhenStopDirectSubmissionForCopyEngineCalledThenStopDirectSubmission) { + DebugManagerStateRestore dbgRestorer; + VariableBackup backupHwInfo(defaultHwInfo.get()); + VariableBackup backup(&ultHwConfig); + debugManager.flags.ForceBCSForInternalCopyEngine.set(0); + defaultHwInfo->capabilityTable.blitterOperationsSupported = true; + ultHwConfig.csrBaseCallBlitterDirectSubmissionAvailable = false; + + UltDeviceFactory factory{1, 0}; + factory.rootDevices[0]->createEngine({aub_stream::EngineType::ENGINE_BCS, EngineUsage::regular}); + + auto device = factory.rootDevices[0]; + auto regularCsr = device->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::regular).commandStreamReceiver; + auto regularUltCsr = reinterpret_cast *>(regularCsr); + regularUltCsr->callBaseStopDirectSubmission = false; + + device->stopDirectSubmissionForCopyEngine(); + EXPECT_FALSE(regularUltCsr->stopDirectSubmissionCalled); + + factory.rootDevices[0]->createEngine({aub_stream::EngineType::ENGINE_BCS, EngineUsage::internal}); + device->stopDirectSubmissionForCopyEngine(); + EXPECT_FALSE(regularUltCsr->stopDirectSubmissionCalled); + + regularUltCsr->blitterDirectSubmissionAvailable = true; + device->stopDirectSubmissionForCopyEngine(); + EXPECT_TRUE(regularUltCsr->stopDirectSubmissionCalled); +} \ No newline at end of file