Allocate buffers in local memory for PVC multi root device platforms (1/n)

PVC platform with no support for atomic operations on system memory
must always allocate buffers in local memory to avoid atomic access violation.
Note: the feature is being implemented under the new registry key
AllocateBuffersInLocalMemoryForMultiRootDeviceContexts (disabled by default)

Related-To: NEO-7092

Signed-off-by: Milczarek, Slawomir <slawomir.milczarek@intel.com>
This commit is contained in:
Milczarek, Slawomir
2022-10-24 23:25:04 +00:00
committed by Compute-Runtime-Automation
parent b0c97e49ea
commit 25a5ed0dca
15 changed files with 137 additions and 14 deletions

View File

@ -981,6 +981,9 @@ bool CommandQueue::queueDependenciesClearRequired() const {
bool CommandQueue::blitEnqueueAllowed(const CsrSelectionArgs &args) const {
bool blitEnqueueAllowed = getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() || this->isCopyOnly;
if (this->getContext().getRootDeviceIndices().size() > 1) {
blitEnqueueAllowed &= !DebugManager.flags.AllocateBuffersInLocalMemoryForMultiRootDeviceContexts.get();
}
if (DebugManager.flags.EnableBlitterForEnqueueOperations.get() != -1) {
blitEnqueueAllowed = DebugManager.flags.EnableBlitterForEnqueueOperations.get();
}

View File

@ -440,6 +440,8 @@ Buffer *Buffer::create(Context *context,
}
}
multiGraphicsAllocation.setMultiStorage(MemoryPropertiesHelper::useMultiStorageForCrossRootDeviceAccess(context->getRootDeviceIndices().size() > 1));
auto rootDeviceIndex = context->getDevice(0u)->getRootDeviceIndex();
auto &allocationInfo = allocationInfos[rootDeviceIndex];
auto memoryStorage = multiGraphicsAllocation.getDefaultGraphicsAllocation()->getUnderlyingBuffer();

View File

@ -27,12 +27,17 @@ void MigrationController::handleMigration(Context &context, CommandStreamReceive
if (migrationSyncData->getCurrentLocation() != targetRootDeviceIndex) {
migrateMemory(context, *memoryManager, memObj, targetRootDeviceIndex);
}
migrationSyncData->signalUsage(targetCsr.getTagAddress(), targetCsr.peekTaskCount() + 1);
if (!context.getSpecialQueue(targetRootDeviceIndex)->isWaitForTimestampsEnabled()) {
migrationSyncData->signalUsage(targetCsr.getTagAddress(), targetCsr.peekTaskCount() + 1);
}
}
void MigrationController::migrateMemory(Context &context, MemoryManager &memoryManager, MemObj *memObj, uint32_t targetRootDeviceIndex) {
auto &multiGraphicsAllocation = memObj->getMultiGraphicsAllocation();
auto migrationSyncData = multiGraphicsAllocation.getMigrationSyncData();
if (migrationSyncData->isMigrationInProgress()) {
return;
}
auto sourceRootDeviceIndex = migrationSyncData->getCurrentLocation();
if (sourceRootDeviceIndex == std::numeric_limits<uint32_t>::max()) {

View File

@ -1689,6 +1689,20 @@ TEST(CommandQueue, givenImageToBufferClCommandWhenCallingBlitEnqueueAllowedThenR
EXPECT_FALSE(queue.blitEnqueueAllowed(args));
}
TEST(CommandQueue, givenAllocateBuffersInLocalMemoryForMultiRootDeviceContextsWhenMultiRootDeviceContextIsCreatedThenWhenBlitEnqueueIsNotAllowed) {
DebugManagerStateRestore restorer;
DebugManager.flags.AllocateBuffersInLocalMemoryForMultiRootDeviceContexts.set(1);
MockDefaultContext context{true};
MockCommandQueue queue(&context, context.getDevice(0), 0, false);
MockGraphicsAllocation alloc{};
ASSERT_TRUE(context.getRootDeviceIndices().size() > 1);
CsrSelectionArgs args{CL_COMMAND_READ_BUFFER, &alloc, &alloc, 0u, nullptr};
EXPECT_FALSE(queue.blitEnqueueAllowed(args));
}
template <bool blitter, bool selectBlitterWithQueueFamilies>
struct CsrSelectionCommandQueueTests : ::testing::Test {
void SetUp() override {

View File

@ -219,7 +219,7 @@ HWTEST_F(EnqueueReadImageTest, givenGpuHangAndCommandQueueAndPtrCopyAllowedForHo
HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageThenKernelRequiresMigration) {
MockDefaultContext context;
MockDefaultContext context{true};
auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context);
@ -270,7 +270,7 @@ HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageThen
HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageIsCalledMultipleTimesThenEachKernelUsesDifferentImage) {
MockDefaultContext context;
MockDefaultContext context{true};
auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context);
@ -352,7 +352,7 @@ HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageIsCa
}
HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueReadImageIsCalledThenCommandQueueIsFlushed) {
MockDefaultContext context;
MockDefaultContext context{true};
auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context);
@ -374,7 +374,7 @@ HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueRea
HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueReadImageIsCalledThenTlbCacheIsInvalidated) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
MockDefaultContext context;
MockDefaultContext context{true};
auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context);
@ -409,7 +409,7 @@ HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueRea
HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageIsCalledToDifferentDevicesThenCorrectLocationIsSet) {
MockDefaultContext context;
MockDefaultContext context{true};
auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context);
auto pCmdQ2 = createCommandQueue(context.getDevice(1), nullptr, &context);
@ -473,7 +473,7 @@ HWTEST_F(EnqueueReadImageTest, givenMultiRootDeviceImageWhenEnqueueReadImageIsCa
HWTEST_F(EnqueueReadImageTest, givenImageFromBufferThatRequiresMigrationWhenEnqueueReadImageThenBufferObjectIsTakenForMigration) {
MockDefaultContext context;
MockDefaultContext context{true};
auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context);

View File

@ -590,7 +590,7 @@ HWTEST_F(EnqueueWriteImageTest, whenEnqueueWriteImageThenBuiltinKernelIsResolved
HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenEnqueueWriteImageThenKernelRequiresMigration) {
MockDefaultContext context;
MockDefaultContext context{true};
auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context);
@ -641,7 +641,7 @@ HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenEnqueueWriteImageTh
HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenEnqueueWriteImageIsCalledMultipleTimesThenEachKernelUsesDifferentImage) {
MockDefaultContext context;
MockDefaultContext context{true};
auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context);
@ -723,7 +723,7 @@ HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenEnqueueWriteImageIs
}
HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueWriteImageIsCalledThenCommandQueueIsFlushed) {
MockDefaultContext context;
MockDefaultContext context{true};
auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context);
@ -745,7 +745,7 @@ HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueWr
HWTEST_F(EnqueueWriteImageTest, givenMultiRootDeviceImageWhenNonBlockedEnqueueWriteImageIsCalledThenTlbCacheIsInvalidated) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
MockDefaultContext context;
MockDefaultContext context{true};
auto pCmdQ1 = createCommandQueue(context.getDevice(0), nullptr, &context);

View File

@ -557,3 +557,43 @@ TEST_F(MemoryPropertiesHelperTests, givenSubDeviceIdWhenParsingExtraMemoryProper
EXPECT_EQ(0b10u, memoryProperties.pDevice->getDeviceBitfield().to_ulong());
EXPECT_EQ(&context.pSubDevice1->getDevice(), memoryProperties.pDevice);
}
TEST_F(MemoryPropertiesHelperTests, whenQueryingUseSystemMemoryForCrossRootDeviceAccessThenReturnTrueForMultiRootDeviceContexts) {
for (auto multiRootDevice : {false, true}) {
EXPECT_EQ(multiRootDevice, MemoryPropertiesHelper::useSystemMemoryForCrossRootDeviceAccess(multiRootDevice));
}
}
TEST_F(MemoryPropertiesHelperTests, givenAllocateBuffersInLocalMemoryForMultiRootDeviceContextsWhenQueryingUseSystemMemoryForCrossRootDeviceAccessThenReturnFalseForMultiRootDeviceContexts) {
DebugManagerStateRestore restore;
for (auto localMemory : {false, true}) {
DebugManager.flags.AllocateBuffersInLocalMemoryForMultiRootDeviceContexts.set(localMemory);
EXPECT_FALSE(MemoryPropertiesHelper::useSystemMemoryForCrossRootDeviceAccess(false));
}
for (auto localMemory : {false, true}) {
DebugManager.flags.AllocateBuffersInLocalMemoryForMultiRootDeviceContexts.set(localMemory);
EXPECT_NE(localMemory, MemoryPropertiesHelper::useSystemMemoryForCrossRootDeviceAccess(true));
}
}
TEST_F(MemoryPropertiesHelperTests, whenQueryingUseMultiStorageForCrossRootDeviceAccessThenReturnFalseForMultiRootDeviceContexts) {
for (auto multiRootDevice : {false, true}) {
EXPECT_FALSE(MemoryPropertiesHelper::useMultiStorageForCrossRootDeviceAccess(multiRootDevice));
}
}
TEST_F(MemoryPropertiesHelperTests, givenAllocateBuffersInLocalMemoryForMultiRootDeviceContextsWhenQueryingUseMultiStorageForCrossRootDeviceAccessThenReturnTrueForMultiRootDeviceContexts) {
DebugManagerStateRestore restore;
for (auto localMemory : {false, true}) {
DebugManager.flags.AllocateBuffersInLocalMemoryForMultiRootDeviceContexts.set(localMemory);
EXPECT_FALSE(MemoryPropertiesHelper::useMultiStorageForCrossRootDeviceAccess(false));
}
for (auto localMemory : {false, true}) {
DebugManager.flags.AllocateBuffersInLocalMemoryForMultiRootDeviceContexts.set(localMemory);
EXPECT_EQ(localMemory, MemoryPropertiesHelper::useMultiStorageForCrossRootDeviceAccess(true));
}
}

View File

@ -213,3 +213,42 @@ TEST_F(MigrationControllerTests, whenHandleMigrationThenProperTagAddressAndTaskC
EXPECT_EQ(pCsr0->getTagAddress(), migrationSyncData->tagAddress);
EXPECT_EQ(pCsr0->peekTaskCount() + 1, migrationSyncData->latestTaskCountUsed);
}
TEST_F(MigrationControllerTests, givenWaitForTimestampsEnabledWhenHandleMigrationIsCalledThenDontSignalTaskCountBasedUsage) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableTimestampWaitForQueues.set(4);
VariableBackup<decltype(MultiGraphicsAllocation::createMigrationSyncDataFunc)> createFuncBackup{&MultiGraphicsAllocation::createMigrationSyncDataFunc};
MultiGraphicsAllocation::createMigrationSyncDataFunc = [](size_t size) -> MigrationSyncData * {
return new MockMigrationSyncData(size);
};
std::unique_ptr<Buffer> pBuffer(BufferHelper<>::create(&context));
const_cast<MultiGraphicsAllocation &>(pBuffer->getMultiGraphicsAllocation()).setMultiStorage(true);
ASSERT_TRUE(pBuffer->getMultiGraphicsAllocation().requiresMigrations());
auto migrationSyncData = static_cast<MockMigrationSyncData *>(pBuffer->getMultiGraphicsAllocation().getMigrationSyncData());
MigrationController::handleMigration(context, *pCsr0, pBuffer.get());
EXPECT_EQ(0u, migrationSyncData->signalUsageCalled);
}
TEST_F(MigrationControllerTests, whenMemoryMigrationForMemoryObjectIsAlreadyInProgressThenDoEarlyReturn) {
DebugManagerStateRestore restorer;
DebugManager.flags.AllocateBuffersInLocalMemoryForMultiRootDeviceContexts.set(1);
std::unique_ptr<Buffer> pBuffer(BufferHelper<>::create(&context));
ASSERT_TRUE(pBuffer->getMultiGraphicsAllocation().requiresMigrations());
auto migrationSyncData = static_cast<MockMigrationSyncData *>(pBuffer->getMultiGraphicsAllocation().getMigrationSyncData());
migrationSyncData->startMigration();
EXPECT_TRUE(migrationSyncData->isMigrationInProgress());
MigrationController::migrateMemory(context, *memoryManager, pBuffer.get(), pCsr1->getRootDeviceIndex());
EXPECT_TRUE(migrationSyncData->isMigrationInProgress());
}