performance: stop ULLS for BCS during migration

Related-To: NEO-13340

When regular copy CSR has enabled direct submission,
stop it before migration on internal CSR.

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek 2024-11-27 14:52:14 +00:00 committed by Compute-Runtime-Automation
parent 9b952c08c7
commit 81ba52aac4
5 changed files with 53 additions and 0 deletions

View File

@ -18,6 +18,7 @@
namespace NEO {
void PageFaultManager::transferToCpu(void *ptr, size_t size, void *device) {
L0::DeviceImp *deviceImp = static_cast<L0::DeviceImp *>(device);
deviceImp->getNEODevice()->stopDirectSubmissionForCopyEngine();
NEO::SvmAllocationData *allocData = deviceImp->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr);
UNRECOVERABLE_IF(allocData == nullptr);
@ -30,6 +31,7 @@ void PageFaultManager::transferToCpu(void *ptr, size_t size, void *device) {
}
void PageFaultManager::transferToGpu(void *ptr, void *device) {
L0::DeviceImp *deviceImp = static_cast<L0::DeviceImp *>(device);
deviceImp->getNEODevice()->stopDirectSubmissionForCopyEngine();
NEO::SvmAllocationData *allocData = deviceImp->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr);
UNRECOVERABLE_IF(allocData == nullptr);

View File

@ -18,11 +18,15 @@
namespace NEO {
void PageFaultManager::transferToCpu(void *ptr, size_t size, void *cmdQ) {
auto commandQueue = static_cast<CommandQueue *>(cmdQ);
commandQueue->getDevice().stopDirectSubmissionForCopyEngine();
auto retVal = commandQueue->enqueueSVMMap(true, CL_MAP_WRITE, ptr, size, 0, nullptr, nullptr, false);
UNRECOVERABLE_IF(retVal);
}
void PageFaultManager::transferToGpu(void *ptr, void *cmdQ) {
auto commandQueue = static_cast<CommandQueue *>(cmdQ);
commandQueue->getDevice().stopDirectSubmissionForCopyEngine();
memoryData[ptr].unifiedMemoryManager->insertSvmMapOperation(ptr, memoryData[ptr].size, ptr, 0, false);
auto retVal = commandQueue->enqueueSVMUnmap(ptr, 0, nullptr, nullptr, false);
UNRECOVERABLE_IF(retVal);

View File

@ -1231,4 +1231,20 @@ EngineControl *SecondaryContexts::getEngine(EngineUsage usage) {
return &engines[secondaryEngineIndex];
}
void Device::stopDirectSubmissionForCopyEngine() {
auto internalBcsEngine = getInternalCopyEngine();
if (internalBcsEngine == nullptr || getHardwareInfo().featureTable.ftrBcsInfo.count() > 1) {
return;
}
auto regularBcsEngine = tryGetEngine(internalBcsEngine->osContext->getEngineType(), EngineUsage::regular);
if (regularBcsEngine == nullptr) {
return;
}
auto regularBcs = regularBcsEngine->commandStreamReceiver;
if (regularBcs->isAnyDirectSubmissionEnabled()) {
auto lock = regularBcs->obtainUniqueOwnership();
regularBcs->stopDirectSubmission(true);
}
}
} // namespace NEO

View File

@ -216,6 +216,8 @@ class Device : public ReferenceTrackedObject<Device> {
std::atomic<uint32_t> debugExecutionCounter = 0;
void stopDirectSubmissionForCopyEngine();
std::unique_lock<std::mutex> obtainAllocationsReuseLock() const {
return std::unique_lock<std::mutex>(allocationsReuseMtx);
}

View File

@ -25,6 +25,7 @@
#include "shared/test/common/helpers/raii_product_helper.h"
#include "shared/test/common/helpers/ult_hw_config.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_allocation_properties.h"
#include "shared/test/common/mocks/mock_builtins.h"
#include "shared/test/common/mocks/mock_compiler_interface.h"
@ -1918,3 +1919,31 @@ TEST(DeviceWithoutAILTest, givenNoAILWhenCreateDeviceThenDeviceIsCreated) {
EXPECT_NE(nullptr, device.get());
}
HWTEST_F(DeviceTests, givenCopyInternalEngineWhenStopDirectSubmissionForCopyEngineCalledThenStopDirectSubmission) {
DebugManagerStateRestore dbgRestorer;
VariableBackup<HardwareInfo> backupHwInfo(defaultHwInfo.get());
VariableBackup<UltHwConfig> backup(&ultHwConfig);
debugManager.flags.ForceBCSForInternalCopyEngine.set(0);
defaultHwInfo->capabilityTable.blitterOperationsSupported = true;
ultHwConfig.csrBaseCallBlitterDirectSubmissionAvailable = false;
UltDeviceFactory factory{1, 0};
factory.rootDevices[0]->createEngine({aub_stream::EngineType::ENGINE_BCS, EngineUsage::regular});
auto device = factory.rootDevices[0];
auto regularCsr = device->getEngine(aub_stream::EngineType::ENGINE_BCS, EngineUsage::regular).commandStreamReceiver;
auto regularUltCsr = reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(regularCsr);
regularUltCsr->callBaseStopDirectSubmission = false;
device->stopDirectSubmissionForCopyEngine();
EXPECT_FALSE(regularUltCsr->stopDirectSubmissionCalled);
factory.rootDevices[0]->createEngine({aub_stream::EngineType::ENGINE_BCS, EngineUsage::internal});
device->stopDirectSubmissionForCopyEngine();
EXPECT_FALSE(regularUltCsr->stopDirectSubmissionCalled);
regularUltCsr->blitterDirectSubmissionAvailable = true;
device->stopDirectSubmissionForCopyEngine();
EXPECT_TRUE(regularUltCsr->stopDirectSubmissionCalled);
}