[perf] Prefetch of kmd-migrated shared allocation with initial placement on GPU

Related-To: NEO-7646

Signed-off-by: Milczarek, Slawomir <slawomir.milczarek@intel.com>
This commit is contained in:
Milczarek, Slawomir 2023-04-25 13:54:13 +00:00 committed by Compute-Runtime-Automation
parent 06bd405e88
commit c9fdeb200c
5 changed files with 117 additions and 0 deletions

View File

@ -2217,6 +2217,21 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
[[maybe_unused]] auto success = allocation->setMemAdvise(&drm, memAdviseFlags);
DEBUG_BREAK_IF(!success);
if (allocationData.usmInitialPlacement == GraphicsAllocation::UsmInitialPlacement::GPU) {
auto getSubDeviceIds = [](const DeviceBitfield &subDeviceBitfield) {
SubDeviceIdsVec subDeviceIds;
for (auto subDeviceId = 0u; subDeviceId < subDeviceBitfield.size(); subDeviceId++) {
if (subDeviceBitfield.test(subDeviceId)) {
subDeviceIds.push_back(subDeviceId);
}
}
return subDeviceIds;
};
auto subDeviceIds = getSubDeviceIds(allocationData.storageInfo.subDeviceBitfield);
success = setMemPrefetch(allocation.get(), subDeviceIds, allocationData.rootDeviceIndex);
DEBUG_BREAK_IF(!success);
}
return allocation.release();
}

View File

@ -240,6 +240,12 @@ int DrmMockPrelimContext::handlePrelimRequest(DrmIoctl request, void *arg) {
}
return vmAdviseReturn;
} break;
case DrmIoctl::GemVmPrefetch: {
const auto req = static_cast<prelim_drm_i915_gem_vm_prefetch *>(arg);
vmPrefetchCalled++;
receivedVmPrefetch.push_back(VmPrefetch{req->vm_id, req->region});
return 0;
} break;
case DrmIoctl::UuidRegister: {
auto uuidControl = reinterpret_cast<prelim_drm_i915_uuid_control *>(arg);

View File

@ -78,6 +78,11 @@ struct VmAdvise {
MemoryClassInstance memoryRegions{0};
};
struct VmPrefetch {
uint32_t vmId{0};
uint32_t region{0};
};
struct UuidVmBindExt {
uint32_t handle{0};
uint64_t nextExtension{0};
@ -130,6 +135,9 @@ struct DrmMockPrelimContext {
std::optional<VmAdvise> receivedVmAdvise[2]{};
int vmAdviseReturn{0};
size_t vmPrefetchCalled{0};
std::vector<VmPrefetch> receivedVmPrefetch{};
int mmapOffsetReturn{0};
uint32_t uuidHandle{1};

View File

@ -18,6 +18,42 @@
#include "shared/test/common/os_interface/linux/drm_memory_manager_fixture.h"
#include "shared/test/common/os_interface/linux/drm_mock_memory_info.h"
template <bool multipleSubDevices>
class DrmMemoryManagerWithSubDevicesPrelimTest : public ::testing::Test {
public:
void SetUp() override {
DebugManager.flags.CreateMultipleSubDevices.set(multipleSubDevices ? 2 : 1);
executionEnvironment = new ExecutionEnvironment();
executionEnvironment->prepareRootDeviceEnvironments(1);
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setHwInfoAndInitHelpers(defaultHwInfo.get());
mock = new DrmQueryMock(*executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]);
auto memoryInfo = new MockExtendedMemoryInfo(*mock);
mock->memoryInfo.reset(memoryInfo);
mock->queryEngineInfo();
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface = std::make_unique<OSInterface>();
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->osInterface->setDriverModel(std::unique_ptr<DriverModel>(mock));
executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->memoryOperationsInterface = DrmMemoryOperationsHandler::create(*mock, 0u);
memoryManager = new TestedDrmMemoryManager(true, false, false, *executionEnvironment);
executionEnvironment->memoryManager.reset(memoryManager);
device.reset(MockDevice::createWithExecutionEnvironment<MockDevice>(defaultHwInfo.get(), executionEnvironment, rootDeviceIndex));
}
protected:
DebugManagerStateRestore restorer{};
ExecutionEnvironment *executionEnvironment{nullptr};
DrmQueryMock *mock{nullptr};
std::unique_ptr<MockDevice> device;
TestedDrmMemoryManager *memoryManager{nullptr};
constexpr static uint32_t rootDeviceIndex{0u};
};
class DrmMemoryManagerLocalMemoryPrelimTest : public ::testing::Test {
public:
void SetUp() override {

View File

@ -776,6 +776,58 @@ TEST_F(DrmMemoryManagerLocalMemoryPrelimTest, givenKMDSupportForCrossTileMigrati
unifiedMemoryManager.freeSVMAlloc(ptr);
}
using DrmMemoryManagerWithSingleSubDevicePrelimTest = DrmMemoryManagerWithSubDevicesPrelimTest<false>;
TEST_F(DrmMemoryManagerWithSingleSubDevicePrelimTest, givenUnifiedMemoryAllocationOnSingleSubDeviceWhenCreatedWithInitialPlacementOnGpuThenCallMemoryPrefetch) {
DeviceBitfield subDevices = 0b01;
AllocationProperties gpuProperties{0u,
MemoryConstants::pageSize64k,
AllocationType::UNIFIED_SHARED_MEMORY,
subDevices};
gpuProperties.alignment = 2 * MemoryConstants::megaByte;
gpuProperties.usmInitialPlacement = GraphicsAllocation::UsmInitialPlacement::GPU;
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(gpuProperties);
ASSERT_NE(allocation, nullptr);
EXPECT_EQ(mock->context.vmBindCalled, 1u);
EXPECT_EQ(mock->context.vmPrefetchCalled, 1u);
ASSERT_EQ(mock->context.receivedVmPrefetch.size(), 1u);
EXPECT_EQ(mock->context.receivedVmPrefetch[0].vmId, 1u);
EXPECT_EQ(mock->context.receivedVmPrefetch[0].region, static_cast<uint32_t>(drm_i915_gem_memory_class::I915_MEMORY_CLASS_DEVICE << 16 | 0u));
memoryManager->freeGraphicsMemory(allocation);
}
using DrmMemoryManagerWithMultipleSubDevicesPrelimTest = DrmMemoryManagerWithSubDevicesPrelimTest<true>;
TEST_F(DrmMemoryManagerWithMultipleSubDevicesPrelimTest, givenUnifiedMemoryAllocationOnMultipleSubdevicesWhenCreatedWithInitialPlacementIsOnGpuThenCallVmPrefetchCorrectly) {
DeviceBitfield subDevices = 0b11;
AllocationProperties gpuProperties{0u,
2 * MemoryConstants::pageSize64k,
AllocationType::UNIFIED_SHARED_MEMORY,
subDevices};
gpuProperties.alignment = 2 * MemoryConstants::megaByte;
gpuProperties.usmInitialPlacement = GraphicsAllocation::UsmInitialPlacement::GPU;
auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(gpuProperties);
ASSERT_NE(allocation, nullptr);
EXPECT_EQ(mock->context.vmBindCalled, 4u);
EXPECT_EQ(mock->context.vmPrefetchCalled, 4u);
ASSERT_EQ(mock->context.receivedVmPrefetch.size(), 4u);
EXPECT_EQ(mock->context.receivedVmPrefetch[0].vmId, 1u);
EXPECT_EQ(mock->context.receivedVmPrefetch[0].region, static_cast<uint32_t>(drm_i915_gem_memory_class::I915_MEMORY_CLASS_DEVICE << 16 | 0u));
EXPECT_EQ(mock->context.receivedVmPrefetch[1].vmId, 2u);
EXPECT_EQ(mock->context.receivedVmPrefetch[1].region, static_cast<uint32_t>(drm_i915_gem_memory_class::I915_MEMORY_CLASS_DEVICE << 16 | 0u));
EXPECT_EQ(mock->context.receivedVmPrefetch[2].vmId, 1u);
EXPECT_EQ(mock->context.receivedVmPrefetch[2].region, static_cast<uint32_t>(drm_i915_gem_memory_class::I915_MEMORY_CLASS_DEVICE << 16 | 1u));
EXPECT_EQ(mock->context.receivedVmPrefetch[3].vmId, 2u);
EXPECT_EQ(mock->context.receivedVmPrefetch[3].region, static_cast<uint32_t>(drm_i915_gem_memory_class::I915_MEMORY_CLASS_DEVICE << 16 | 1u));
memoryManager->freeGraphicsMemory(allocation);
}
TEST_F(DrmMemoryManagerLocalMemoryPrelimTest, whenVmAdviseIoctlFailsThenCreateSharedUnifiedMemoryAllocationReturnsNullptr) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseKmdMigration.set(1);