From bd1ac55e2c647f42d4b02cea8226164ce6d41d02 Mon Sep 17 00:00:00 2001 From: Michal Mrozek Date: Fri, 21 Feb 2020 08:25:43 +0100 Subject: [PATCH] [1/n] Refactor CPU copies. - remove waitlist check from buffer - refactor the flow in CommandQueue::bufferCpuCopyAllowed Change-Id: I52bb7f886211b05f80118665bb28dfdb5f113fe7 --- runtime/command_queue/command_queue.cpp | 23 +++++++++++-- runtime/mem_obj/buffer.cpp | 4 +-- runtime/mem_obj/buffer.h | 2 +- .../read_write_buffer_cpu_copy.cpp | 32 +++++++++---------- .../sharings/gl/windows/gl_sharing_tests.cpp | 2 +- 5 files changed, 39 insertions(+), 24 deletions(-) diff --git a/runtime/command_queue/command_queue.cpp b/runtime/command_queue/command_queue.cpp index a9d9d33bfa..aef15bf1cb 100644 --- a/runtime/command_queue/command_queue.cpp +++ b/runtime/command_queue/command_queue.cpp @@ -553,9 +553,26 @@ bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandT bool debugVariableSet = (CL_COMMAND_READ_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnReadBuffer.get()) || (CL_COMMAND_WRITE_BUFFER == commandType && DebugManager.flags.DoCpuCopyOnWriteBuffer.get()); - return (debugVariableSet && !Event::checkUserEventDependencies(numEventsInWaitList, eventWaitList) && - buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) || - buffer->isReadWriteOnCpuAllowed(blocking, numEventsInWaitList, ptr, size); + //if we are blocked by user events, we can't service the call on CPU + if (Event::checkUserEventDependencies(numEventsInWaitList, eventWaitList)) { + return false; + } + + if (debugVariableSet && buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) { + return true; + } + + //check if buffer is compatible + if (!buffer->isReadWriteOnCpuAllowed(blocking, ptr, size)) { + return false; + } + + //make sure that event wait list is empty + if (numEventsInWaitList == 0) { + return true; + } + + return false; } bool CommandQueue::queueDependenciesClearRequired() const { diff --git a/runtime/mem_obj/buffer.cpp b/runtime/mem_obj/buffer.cpp index b3f13a7d4a..260b8fd1a6 100644 --- a/runtime/mem_obj/buffer.cpp +++ b/runtime/mem_obj/buffer.cpp @@ -501,8 +501,8 @@ size_t Buffer::calculateHostPtrSize(const size_t *origin, const size_t *region, return hostPtrSize; } -bool Buffer::isReadWriteOnCpuAllowed(cl_bool blocking, cl_uint numEventsInWaitList, void *ptr, size_t size) { - return (blocking == CL_TRUE && numEventsInWaitList == 0 && !forceDisallowCPUCopy) && graphicsAllocation->peekSharedHandle() == 0 && +bool Buffer::isReadWriteOnCpuAllowed(cl_bool blocking, void *ptr, size_t size) { + return (blocking == CL_TRUE && !forceDisallowCPUCopy) && graphicsAllocation->peekSharedHandle() == 0 && (isMemObjZeroCopy() || (reinterpret_cast(ptr) & (MemoryConstants::cacheLineSize - 1)) != 0) && (!context->getDevice(0)->getDeviceInfo().platformLP || (size <= maxBufferSizeForReadWriteOnCpu)) && !(graphicsAllocation->getDefaultGmm() && graphicsAllocation->getDefaultGmm()->isRenderCompressed) && diff --git a/runtime/mem_obj/buffer.h b/runtime/mem_obj/buffer.h index 4bd041c0e2..2cb100895e 100644 --- a/runtime/mem_obj/buffer.h +++ b/runtime/mem_obj/buffer.h @@ -135,7 +135,7 @@ class Buffer : public MemObj { void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override; void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override; - bool isReadWriteOnCpuAllowed(cl_bool blocking, cl_uint numEventsInWaitList, void *ptr, size_t size); + bool isReadWriteOnCpuAllowed(cl_bool blocking, void *ptr, size_t size); uint32_t getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) const; diff --git a/unit_tests/command_queue/read_write_buffer_cpu_copy.cpp b/unit_tests/command_queue/read_write_buffer_cpu_copy.cpp index bae0cad0b9..37948a5dc8 100644 --- a/unit_tests/command_queue/read_write_buffer_cpu_copy.cpp +++ b/unit_tests/command_queue/read_write_buffer_cpu_copy.cpp @@ -23,10 +23,10 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, givenRenderCompressedGmmWhenAskingForCpuOpe auto alignedPtr = alignedMalloc(2, MemoryConstants::cacheLineSize); auto unalignedPtr = ptrOffset(alignedPtr, 1); - EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, 0, unalignedPtr, 1)); + EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedPtr, 1)); gmm->isRenderCompressed = true; - EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, 0, unalignedPtr, 1)); + EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedPtr, 1)); alignedFree(alignedPtr); } @@ -49,7 +49,7 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenUnalignedReadPtrWhenReadingBufferThenM bool aligned = (reinterpret_cast(unalignedReadPtr) & (MemoryConstants::cacheLineSize - 1)) == 0; EXPECT_TRUE(!aligned || buffer->isMemObjZeroCopy()); - ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, 0, unalignedReadPtr, size)); + ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedReadPtr, size)); retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, buffer.get(), @@ -89,7 +89,7 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenUnalignedSrcPtrWhenWritingBufferThenMe bool aligned = (reinterpret_cast(unalignedWritePtr) & (MemoryConstants::cacheLineSize - 1)) == 0; EXPECT_TRUE(!aligned || buffer->isMemObjZeroCopy()); - ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, 0, unalignedWritePtr, size)); + ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedWritePtr, size)); retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ, buffer.get(), @@ -134,31 +134,31 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenSpecificMemoryStructuresWhenReadingWri EXPECT_TRUE(buffer->isMemObjZeroCopy()); // zeroCopy == true && aligned/unaligned hostPtr - EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, 0, alignedHostPtr, MemoryConstants::cacheLineSize + 1)); - EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, 0, unalignedHostPtr, MemoryConstants::cacheLineSize)); + EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, alignedHostPtr, MemoryConstants::cacheLineSize + 1)); + EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedHostPtr, MemoryConstants::cacheLineSize)); buffer.reset(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, unalignedBufferPtr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); // zeroCopy == false && unaligned hostPtr - EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, 0, unalignedHostPtr, MemoryConstants::cacheLineSize)); + EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedHostPtr, MemoryConstants::cacheLineSize)); buffer.reset(Buffer::create(mockContext.get(), CL_MEM_USE_HOST_PTR, 1 * MB, smallBufferPtr, retVal)); // platform LP == true && size <= 10 MB mockDevice->deviceInfo.platformLP = true; - EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, 0, smallBufferPtr, 1 * MB)); + EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, smallBufferPtr, 1 * MB)); // platform LP == false && size <= 10 MB mockDevice->deviceInfo.platformLP = false; - EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, 0, smallBufferPtr, 1 * MB)); + EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, smallBufferPtr, 1 * MB)); buffer.reset(Buffer::create(mockContext.get(), CL_MEM_ALLOC_HOST_PTR, largeBufferSize, nullptr, retVal)); // platform LP == false && size > 10 MB mockDevice->deviceInfo.platformLP = false; - EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, 0, buffer->getCpuAddress(), largeBufferSize)); + EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, buffer->getCpuAddress(), largeBufferSize)); alignedFree(smallBufferPtr); alignedFree(alignedHostPtr); @@ -184,22 +184,20 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenSpecificMemoryStructuresWhenReadingWri EXPECT_TRUE(buffer->isMemObjZeroCopy()); // non blocking - EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_FALSE, 0, unalignedHostPtr, MemoryConstants::cacheLineSize)); - // numEventWaitlist > 0 - EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, 1, unalignedHostPtr, MemoryConstants::cacheLineSize)); + EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_FALSE, unalignedHostPtr, MemoryConstants::cacheLineSize)); buffer.reset(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, unalignedBufferPtr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); // zeroCopy == false && aligned hostPtr - EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, 0, alignedHostPtr, MemoryConstants::cacheLineSize + 1)); + EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, alignedHostPtr, MemoryConstants::cacheLineSize + 1)); buffer.reset(Buffer::create(mockContext.get(), CL_MEM_ALLOC_HOST_PTR, largeBufferSize, nullptr, retVal)); // platform LP == true && size > 10 MB mockDevice->deviceInfo.platformLP = true; - EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, 0, buffer->getCpuAddress(), largeBufferSize)); + EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, buffer->getCpuAddress(), largeBufferSize)); alignedFree(alignedHostPtr); alignedFree(alignedBufferPtr); @@ -218,7 +216,7 @@ TEST(ReadWriteBufferOnCpu, givenNoHostPtrAndAlignedSizeWhenMemoryAllocationIsInN std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); - EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, 0, reinterpret_cast(0x1000), MemoryConstants::pageSize)); + EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, reinterpret_cast(0x1000), MemoryConstants::pageSize)); reinterpret_cast(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); - EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, 0, reinterpret_cast(0x1000), MemoryConstants::pageSize)); + EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, reinterpret_cast(0x1000), MemoryConstants::pageSize)); } diff --git a/unit_tests/sharings/gl/windows/gl_sharing_tests.cpp b/unit_tests/sharings/gl/windows/gl_sharing_tests.cpp index d6962ca237..1fcaebc59a 100644 --- a/unit_tests/sharings/gl/windows/gl_sharing_tests.cpp +++ b/unit_tests/sharings/gl/windows/gl_sharing_tests.cpp @@ -212,7 +212,7 @@ TEST_F(glSharingTests, givenClGLBufferWhenItIsAcquiredThenAcuqireCountIsIncremen auto memObject = castToObject(glBuffer); EXPECT_FALSE(memObject->isMemObjZeroCopy()); - EXPECT_FALSE(memObject->isReadWriteOnCpuAllowed(true, 0, (void *)0x1001, 100)); + EXPECT_FALSE(memObject->isReadWriteOnCpuAllowed(true, (void *)0x1001, 100)); auto currentGraphicsAllocation = memObject->getGraphicsAllocation(); memObject->peekSharingHandler()->acquire(memObject);