Enable memory transfer between buffers rect

Unlock flow for multi device setup in:
- enqueueCopyBufferRect
- enqueueReadBufferRect
- enqueueWriteBufferRect

Related-To: NEO-4589
Signed-off-by: Krzysztof Gibala <krzysztof.gibala@intel.com>
This commit is contained in:
Krzysztof Gibala 2020-11-20 15:38:07 +01:00 committed by Compute-Runtime-Automation
parent 9f96552007
commit 84f3d44968
6 changed files with 108 additions and 5 deletions

View File

@ -33,6 +33,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueCopyBufferRect(
const cl_event *eventWaitList, const cl_event *eventWaitList,
cl_event *event) { cl_event *event) {
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
srcBuffer->getMigrateableMultiGraphicsAllocation().ensureMemoryOnDevice(*getDevice().getMemoryManager(), rootDeviceIndex);
dstBuffer->getMigrateableMultiGraphicsAllocation().ensureMemoryOnDevice(*getDevice().getMemoryManager(), rootDeviceIndex);
auto eBuiltInOps = EBuiltInOps::CopyBufferRect; auto eBuiltInOps = EBuiltInOps::CopyBufferRect;
if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) { if (forceStateless(std::max(srcBuffer->getSize(), dstBuffer->getSize()))) {
eBuiltInOps = EBuiltInOps::CopyBufferRectStateless; eBuiltInOps = EBuiltInOps::CopyBufferRectStateless;

View File

@ -35,6 +35,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
const cl_event *eventWaitList, const cl_event *eventWaitList,
cl_event *event) { cl_event *event) {
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
buffer->getMigrateableMultiGraphicsAllocation().ensureMemoryOnDevice(*getDevice().getMemoryManager(), rootDeviceIndex);
const cl_command_type cmdType = CL_COMMAND_READ_BUFFER_RECT; const cl_command_type cmdType = CL_COMMAND_READ_BUFFER_RECT;
auto isMemTransferNeeded = true; auto isMemTransferNeeded = true;
if (buffer->isMemObjZeroCopy()) { if (buffer->isMemObjZeroCopy()) {

View File

@ -34,6 +34,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBufferRect(
const cl_event *eventWaitList, const cl_event *eventWaitList,
cl_event *event) { cl_event *event) {
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
buffer->getMigrateableMultiGraphicsAllocation().ensureMemoryOnDevice(*getDevice().getMemoryManager(), rootDeviceIndex);
const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER_RECT; const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER_RECT;
auto isMemTransferNeeded = true; auto isMemTransferNeeded = true;
if (buffer->isMemObjZeroCopy()) { if (buffer->isMemObjZeroCopy()) {

View File

@ -35,7 +35,6 @@ class MultiRootDeviceFixture : public ::testing::Test {
} }
const uint32_t expectedRootDeviceIndex = 1; const uint32_t expectedRootDeviceIndex = 1;
DebugManagerStateRestore restorer;
std::unique_ptr<UltClDeviceFactory> deviceFactory; std::unique_ptr<UltClDeviceFactory> deviceFactory;
MockClDevice *device1 = nullptr; MockClDevice *device1 = nullptr;
MockClDevice *device2 = nullptr; MockClDevice *device2 = nullptr;

View File

@ -1830,7 +1830,7 @@ TEST_F(MultiRootDeviceBufferTest, WhenBufferIsCreatedThenBufferGraphicsAllocatio
EXPECT_EQ(expectedRootDeviceIndex, graphicsAllocation->getRootDeviceIndex()); EXPECT_EQ(expectedRootDeviceIndex, graphicsAllocation->getRootDeviceIndex());
} }
TEST_F(MultiRootDeviceBufferTest, WhenBufferIsCreatedAndEnqueueWriteCalledThenBufferMultiGraphicsAllocationLastUsedRootDeviceIndexHasCorrectRootDeviceIndex) { TEST_F(MultiRootDeviceBufferTest, WhenBufferIsCreatedAndEnqueueWriteBufferCalledThenBufferMultiGraphicsAllocationLastUsedRootDeviceIndexHasCorrectRootDeviceIndex) {
cl_int retVal = 0; cl_int retVal = 0;
cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_flags flags = CL_MEM_READ_WRITE;
@ -1856,7 +1856,7 @@ TEST_F(MultiRootDeviceBufferTest, WhenBufferIsCreatedAndEnqueueWriteCalledThenBu
EXPECT_EQ(buffer->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 2u); EXPECT_EQ(buffer->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 2u);
} }
TEST_F(MultiRootDeviceBufferTest, WhenBufferIsCreatedAndEnqueueReadCalledThenBufferMultiGraphicsAllocationLastUsedRootDeviceIndexHasCorrectRootDeviceIndex) { TEST_F(MultiRootDeviceBufferTest, WhenBufferIsCreatedAndEnqueueReadBufferCalledThenBufferMultiGraphicsAllocationLastUsedRootDeviceIndexHasCorrectRootDeviceIndex) {
cl_int retVal = 0; cl_int retVal = 0;
cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_flags flags = CL_MEM_READ_WRITE;
@ -1913,6 +1913,97 @@ TEST_F(MultiRootDeviceBufferTest, WhenBuffersAreCreatedAndEnqueueCopyBufferCalle
EXPECT_EQ(buffer2->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 2u); EXPECT_EQ(buffer2->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 2u);
} }
TEST_F(MultiRootDeviceBufferTest, WhenBufferIsCreatedAndEnqueueWriteBufferRectCalledThenBufferMultiGraphicsAllocationLastUsedRootDeviceIndexHasCorrectRootDeviceIndex) {
cl_int retVal = 0;
cl_mem_flags flags = CL_MEM_READ_WRITE;
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {0, 0, 0};
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, MemoryConstants::pageSize, nullptr, retVal));
void *ptr = buffer->getCpuAddressForMemoryTransfer();
auto cmdQ1 = context->getSpecialQueue(1u);
cmdQ1->enqueueWriteBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, nullptr, nullptr);
EXPECT_EQ(buffer->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 1u);
cmdQ1->enqueueWriteBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, nullptr, nullptr);
EXPECT_EQ(buffer->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 1u);
auto cmdQ2 = context->getSpecialQueue(2u);
cmdQ2->enqueueWriteBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, nullptr, nullptr);
EXPECT_EQ(buffer->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 2u);
cmdQ1->enqueueWriteBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, nullptr, nullptr);
EXPECT_EQ(buffer->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 1u);
static_cast<MemoryAllocation *>(buffer->getMigrateableMultiGraphicsAllocation().getGraphicsAllocation(2u))->overrideMemoryPool(MemoryPool::LocalMemory);
cmdQ2->enqueueWriteBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, nullptr, nullptr);
EXPECT_EQ(buffer->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 2u);
}
TEST_F(MultiRootDeviceBufferTest, WhenBufferIsCreatedAndEnqueueReadBufferRectCalledThenBufferMultiGraphicsAllocationLastUsedRootDeviceIndexHasCorrectRootDeviceIndex) {
cl_int retVal = 0;
cl_mem_flags flags = CL_MEM_READ_WRITE;
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {0, 0, 0};
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), flags, MemoryConstants::pageSize, nullptr, retVal));
void *ptr = buffer->getCpuAddressForMemoryTransfer();
auto cmdQ1 = context->getSpecialQueue(1u);
cmdQ1->enqueueReadBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, nullptr, nullptr);
EXPECT_EQ(buffer->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 1u);
cmdQ1->enqueueReadBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, nullptr, nullptr);
EXPECT_EQ(buffer->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 1u);
auto cmdQ2 = context->getSpecialQueue(2u);
cmdQ2->enqueueReadBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, nullptr, nullptr);
EXPECT_EQ(buffer->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 2u);
cmdQ1->enqueueReadBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, nullptr, nullptr);
EXPECT_EQ(buffer->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 1u);
static_cast<MemoryAllocation *>(buffer->getMigrateableMultiGraphicsAllocation().getGraphicsAllocation(2u))->overrideMemoryPool(MemoryPool::LocalMemory);
cmdQ2->enqueueReadBufferRect(buffer.get(), CL_FALSE, bufferOrigin, hostOrigin, region, 0, 0, 0, 0, ptr, 0, nullptr, nullptr);
EXPECT_EQ(buffer->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 2u);
}
TEST_F(MultiRootDeviceBufferTest, WhenBuffersAreCreatedAndEnqueueCopyBufferRectCalledThenBuffersMultiGraphicsAllocationLastUsedRootDeviceIndexHasCorrectRootDeviceIndex) {
cl_int retVal = 0;
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {0, 0, 0};
std::unique_ptr<Buffer> buffer1(Buffer::create(context.get(), 0, MemoryConstants::pageSize, nullptr, retVal));
std::unique_ptr<Buffer> buffer2(Buffer::create(context.get(), 0, MemoryConstants::pageSize, nullptr, retVal));
auto cmdQ1 = context->getSpecialQueue(1u);
cmdQ1->enqueueCopyBufferRect(buffer1.get(), buffer2.get(), bufferOrigin, hostOrigin, region, 0, 0, 0, 0, 0, nullptr, nullptr);
EXPECT_EQ(buffer1->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 1u);
EXPECT_EQ(buffer2->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 1u);
cmdQ1->enqueueCopyBufferRect(buffer1.get(), buffer2.get(), bufferOrigin, hostOrigin, region, 0, 0, 0, 0, 0, nullptr, nullptr);
EXPECT_EQ(buffer1->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 1u);
EXPECT_EQ(buffer2->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 1u);
auto cmdQ2 = context->getSpecialQueue(2u);
cmdQ2->enqueueCopyBufferRect(buffer1.get(), buffer2.get(), bufferOrigin, hostOrigin, region, 0, 0, 0, 0, 0, nullptr, nullptr);
EXPECT_EQ(buffer1->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 2u);
EXPECT_EQ(buffer2->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 2u);
cmdQ1->enqueueCopyBufferRect(buffer1.get(), buffer2.get(), bufferOrigin, hostOrigin, region, 0, 0, 0, 0, 0, nullptr, nullptr);
EXPECT_EQ(buffer1->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 1u);
EXPECT_EQ(buffer2->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 1u);
static_cast<MemoryAllocation *>(buffer1->getMigrateableMultiGraphicsAllocation().getGraphicsAllocation(2u))->overrideMemoryPool(MemoryPool::LocalMemory);
cmdQ2->enqueueCopyBufferRect(buffer1.get(), buffer2.get(), bufferOrigin, hostOrigin, region, 0, 0, 0, 0, 0, nullptr, nullptr);
EXPECT_EQ(buffer1->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 2u);
EXPECT_EQ(buffer2->getMultiGraphicsAllocation().getLastUsedRootDeviceIndex(), 2u);
}
TEST_F(MultiRootDeviceBufferTest, givenBufferWhenGetSurfaceSizeCalledWithoutAlignSizeForAuxTranslationThenCorrectValueReturned) { TEST_F(MultiRootDeviceBufferTest, givenBufferWhenGetSurfaceSizeCalledWithoutAlignSizeForAuxTranslationThenCorrectValueReturned) {
cl_int retVal = 0; cl_int retVal = 0;
cl_mem_flags flags = CL_MEM_READ_WRITE; cl_mem_flags flags = CL_MEM_READ_WRITE;

View File

@ -273,9 +273,9 @@ TEST(DiscoverDevices, whenDiscoverDevicesAndForceDeviceIdIsDifferentFromTheExist
EXPECT_FALSE(result); EXPECT_FALSE(result);
} }
using UltClDeviceFactoryTest = DeviceFactoryTest; using UltDeviceFactoryTest = DeviceFactoryTest;
TEST_F(UltClDeviceFactoryTest, givenUltClDeviceFactoryPrepareDeviceEnvironmentsCallWhenItIsDoneThenMockMemoryManagerIsAllocated) { TEST_F(UltDeviceFactoryTest, givenUltDeviceFactoryPrepareDeviceEnvironmentsCallWhenItIsDoneThenMockMemoryManagerIsAllocated) {
UltDeviceFactory::prepareDeviceEnvironments(*executionEnvironment, 2u); UltDeviceFactory::prepareDeviceEnvironments(*executionEnvironment, 2u);
EXPECT_EQ(2u, executionEnvironment->rootDeviceEnvironments.size()); EXPECT_EQ(2u, executionEnvironment->rootDeviceEnvironments.size());