diff --git a/runtime/command_queue/command_queue.cpp b/runtime/command_queue/command_queue.cpp index 0533d8a42e..7dbd942da5 100644 --- a/runtime/command_queue/command_queue.cpp +++ b/runtime/command_queue/command_queue.cpp @@ -562,8 +562,8 @@ bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandT return false; } - //if buffer is compressed we cannot do CPU copy - if (buffer->isCompressed()) { + //check if buffer is compatible + if (!buffer->isReadWriteOnCpuAllowed()) { return false; } @@ -577,8 +577,8 @@ bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandT return false; } - //check if buffer is compatible - if (!buffer->isReadWriteOnCpuAllowed(ptr, size)) { + //check if it is beneficial to do transfer on CPU + if (!buffer->isReadWriteOnCpuPreffered(ptr, size)) { return false; } diff --git a/runtime/mem_obj/buffer.cpp b/runtime/mem_obj/buffer.cpp index 4f6406436a..9635751757 100644 --- a/runtime/mem_obj/buffer.cpp +++ b/runtime/mem_obj/buffer.cpp @@ -501,7 +501,7 @@ size_t Buffer::calculateHostPtrSize(const size_t *origin, const size_t *region, return hostPtrSize; } -bool Buffer::isReadWriteOnCpuAllowed(void *ptr, size_t size) { +bool Buffer::isReadWriteOnCpuAllowed() { if (forceDisallowCPUCopy) { return false; } @@ -513,10 +513,26 @@ bool Buffer::isReadWriteOnCpuAllowed(void *ptr, size_t size) { if (graphicsAllocation->peekSharedHandle() != 0) { return false; } + return true; +} - return (isMemObjZeroCopy() || (reinterpret_cast(ptr) & (MemoryConstants::cacheLineSize - 1)) != 0) && - (!context->getDevice(0)->getDeviceInfo().platformLP || (size <= maxBufferSizeForReadWriteOnCpu)) && - MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool()); +bool Buffer::isReadWriteOnCpuPreffered(void *ptr, size_t size) { + //if buffer is not zero copy and pointer is aligned it will be more beneficial to do the transfer on GPU + if (!isMemObjZeroCopy() && (reinterpret_cast(ptr) & (MemoryConstants::cacheLineSize - 1)) == 0) { + return false; + } + + //on low power devices larger transfers are better on the GPU + if (context->getDevice(0)->getDeviceInfo().platformLP && size > maxBufferSizeForReadWriteOnCpu) { + return false; + } + + //if we are not in System Memory Pool, it is more beneficial to do the transfer on GPU + if (!MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool())) { + return false; + } + + return true; } Buffer *Buffer::createBufferHw(Context *context, diff --git a/runtime/mem_obj/buffer.h b/runtime/mem_obj/buffer.h index a150b3160d..e111df5b23 100644 --- a/runtime/mem_obj/buffer.h +++ b/runtime/mem_obj/buffer.h @@ -135,7 +135,8 @@ class Buffer : public MemObj { void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override; void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override; - bool isReadWriteOnCpuAllowed(void *ptr, size_t size); + bool isReadWriteOnCpuAllowed(); + bool isReadWriteOnCpuPreffered(void *ptr, size_t size); uint32_t getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) const; diff --git a/unit_tests/command_queue/read_write_buffer_cpu_copy.cpp b/unit_tests/command_queue/read_write_buffer_cpu_copy.cpp index e3b9197a6f..10e9d80217 100644 --- a/unit_tests/command_queue/read_write_buffer_cpu_copy.cpp +++ b/unit_tests/command_queue/read_write_buffer_cpu_copy.cpp @@ -24,10 +24,12 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, givenRenderCompressedGmmWhenAskingForCpuOpe auto alignedPtr = alignedMalloc(2, MemoryConstants::cacheLineSize); auto unalignedPtr = ptrOffset(alignedPtr, 1); - EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(unalignedPtr, 1)); + EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed()); + EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(unalignedPtr, 1)); gmm->isRenderCompressed = true; - EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(unalignedPtr, 1)); + EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed()); + EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(unalignedPtr, 1)); alignedFree(alignedPtr); } @@ -50,7 +52,8 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenUnalignedReadPtrWhenReadingBufferThenM bool aligned = (reinterpret_cast(unalignedReadPtr) & (MemoryConstants::cacheLineSize - 1)) == 0; EXPECT_TRUE(!aligned || buffer->isMemObjZeroCopy()); - ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed(unalignedReadPtr, size)); + ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed()); + ASSERT_TRUE(buffer->isReadWriteOnCpuPreffered(unalignedReadPtr, size)); retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ, buffer.get(), @@ -90,7 +93,8 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenUnalignedSrcPtrWhenWritingBufferThenMe bool aligned = (reinterpret_cast(unalignedWritePtr) & (MemoryConstants::cacheLineSize - 1)) == 0; EXPECT_TRUE(!aligned || buffer->isMemObjZeroCopy()); - ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed(unalignedWritePtr, size)); + ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed()); + ASSERT_TRUE(buffer->isReadWriteOnCpuPreffered(unalignedWritePtr, size)); retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ, buffer.get(), @@ -135,31 +139,31 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenSpecificMemoryStructuresWhenReadingWri EXPECT_TRUE(buffer->isMemObjZeroCopy()); // zeroCopy == true && aligned/unaligned hostPtr - EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(alignedHostPtr, MemoryConstants::cacheLineSize + 1)); - EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(unalignedHostPtr, MemoryConstants::cacheLineSize)); + EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(alignedHostPtr, MemoryConstants::cacheLineSize + 1)); + EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(unalignedHostPtr, MemoryConstants::cacheLineSize)); buffer.reset(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, unalignedBufferPtr, retVal)); EXPECT_EQ(retVal, CL_SUCCESS); // zeroCopy == false && unaligned hostPtr - EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(unalignedHostPtr, MemoryConstants::cacheLineSize)); + EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(unalignedHostPtr, MemoryConstants::cacheLineSize)); buffer.reset(Buffer::create(mockContext.get(), CL_MEM_USE_HOST_PTR, 1 * MB, smallBufferPtr, retVal)); // platform LP == true && size <= 10 MB mockDevice->deviceInfo.platformLP = true; - EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(smallBufferPtr, 1 * MB)); + EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(smallBufferPtr, 1 * MB)); // platform LP == false && size <= 10 MB mockDevice->deviceInfo.platformLP = false; - EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(smallBufferPtr, 1 * MB)); + EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(smallBufferPtr, 1 * MB)); buffer.reset(Buffer::create(mockContext.get(), CL_MEM_ALLOC_HOST_PTR, largeBufferSize, nullptr, retVal)); // platform LP == false && size > 10 MB mockDevice->deviceInfo.platformLP = false; - EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(buffer->getCpuAddress(), largeBufferSize)); + EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(buffer->getCpuAddress(), largeBufferSize)); alignedFree(smallBufferPtr); alignedFree(alignedHostPtr); @@ -194,13 +198,13 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenSpecificMemoryStructuresWhenReadingWri EXPECT_EQ(retVal, CL_SUCCESS); // zeroCopy == false && aligned hostPtr - EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(alignedHostPtr, MemoryConstants::cacheLineSize + 1)); + EXPECT_FALSE(buffer->isReadWriteOnCpuPreffered(alignedHostPtr, MemoryConstants::cacheLineSize + 1)); buffer.reset(Buffer::create(mockContext.get(), CL_MEM_ALLOC_HOST_PTR, largeBufferSize, nullptr, retVal)); // platform LP == true && size > 10 MB mockDevice->deviceInfo.platformLP = true; - EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(buffer->getCpuAddress(), largeBufferSize)); + EXPECT_FALSE(buffer->isReadWriteOnCpuPreffered(buffer->getCpuAddress(), largeBufferSize)); alignedFree(alignedHostPtr); alignedFree(alignedBufferPtr); @@ -219,7 +223,10 @@ TEST(ReadWriteBufferOnCpu, givenNoHostPtrAndAlignedSizeWhenMemoryAllocationIsInN std::unique_ptr buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal)); ASSERT_NE(nullptr, buffer.get()); - EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(reinterpret_cast(0x1000), MemoryConstants::pageSize)); + EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed()); + EXPECT_TRUE(buffer->isReadWriteOnCpuPreffered(reinterpret_cast(0x1000), MemoryConstants::pageSize)); reinterpret_cast(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible); - EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(reinterpret_cast(0x1000), MemoryConstants::pageSize)); + //read write on CPU is allowed, but not preffered. We can access this memory via Lock. + EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed()); + EXPECT_FALSE(buffer->isReadWriteOnCpuPreffered(reinterpret_cast(0x1000), MemoryConstants::pageSize)); } diff --git a/unit_tests/sharings/gl/windows/gl_sharing_tests.cpp b/unit_tests/sharings/gl/windows/gl_sharing_tests.cpp index 07db0b6f5f..3b5dac9f04 100644 --- a/unit_tests/sharings/gl/windows/gl_sharing_tests.cpp +++ b/unit_tests/sharings/gl/windows/gl_sharing_tests.cpp @@ -212,7 +212,7 @@ TEST_F(glSharingTests, givenClGLBufferWhenItIsAcquiredThenAcuqireCountIsIncremen auto memObject = castToObject(glBuffer); EXPECT_FALSE(memObject->isMemObjZeroCopy()); - EXPECT_FALSE(memObject->isReadWriteOnCpuAllowed((void *)0x1001, 100)); + EXPECT_FALSE(memObject->isReadWriteOnCpuAllowed()); auto currentGraphicsAllocation = memObject->getGraphicsAllocation(); memObject->peekSharingHandler()->acquire(memObject);