[2/n] Refactor CPU copies

- move blocking flag to command queue function
- refactor buffer function for better readability
- add new method to easily state that buffer is compressed.

Change-Id: I62465b7bc8b109f14d0012b5641d4b822e3cc908
Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek 2020-02-21 09:48:51 +01:00 committed by sys_ocldev
parent 8cb486e821
commit 618472c23e
6 changed files with 61 additions and 23 deletions

View File

@ -562,12 +562,23 @@ bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandT
return false;
}
if (debugVariableSet && buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
//if buffer is compressed we cannot do CPU copy
if (buffer->isCompressed()) {
return false;
}
if (debugVariableSet) {
return true;
}
//non blocking transfers are not expected to be serviced by CPU
//we do not want to artifically stall the pipeline to allow CPU access
if (blocking == CL_FALSE) {
return false;
}
//check if buffer is compatible
if (!buffer->isReadWriteOnCpuAllowed(blocking, ptr, size)) {
if (!buffer->isReadWriteOnCpuAllowed(ptr, size)) {
return false;
}

View File

@ -501,11 +501,21 @@ size_t Buffer::calculateHostPtrSize(const size_t *origin, const size_t *region,
return hostPtrSize;
}
bool Buffer::isReadWriteOnCpuAllowed(cl_bool blocking, void *ptr, size_t size) {
return (blocking == CL_TRUE && !forceDisallowCPUCopy) && graphicsAllocation->peekSharedHandle() == 0 &&
(isMemObjZeroCopy() || (reinterpret_cast<uintptr_t>(ptr) & (MemoryConstants::cacheLineSize - 1)) != 0) &&
bool Buffer::isReadWriteOnCpuAllowed(void *ptr, size_t size) {
if (forceDisallowCPUCopy) {
return false;
}
if (this->isCompressed()) {
return false;
}
if (graphicsAllocation->peekSharedHandle() != 0) {
return false;
}
return (isMemObjZeroCopy() || (reinterpret_cast<uintptr_t>(ptr) & (MemoryConstants::cacheLineSize - 1)) != 0) &&
(!context->getDevice(0)->getDeviceInfo().platformLP || (size <= maxBufferSizeForReadWriteOnCpu)) &&
!(graphicsAllocation->getDefaultGmm() && graphicsAllocation->getDefaultGmm()->isRenderCompressed) &&
MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool());
}
@ -582,6 +592,17 @@ uint32_t Buffer::getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) cons
}
}
bool Buffer::isCompressed() const {
if (this->getGraphicsAllocation()->getDefaultGmm()) {
return this->getGraphicsAllocation()->getDefaultGmm()->isRenderCompressed;
}
if (this->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
return true;
}
return false;
}
void Buffer::setSurfaceState(const ClDevice *device,
void *surfaceState,
size_t svmSize,

View File

@ -135,10 +135,12 @@ class Buffer : public MemObj {
void transferDataToHostPtr(MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset) override;
void transferDataFromHostPtr(MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset) override;
bool isReadWriteOnCpuAllowed(cl_bool blocking, void *ptr, size_t size);
bool isReadWriteOnCpuAllowed(void *ptr, size_t size);
uint32_t getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) const;
bool isCompressed() const;
protected:
Buffer(Context *context,
MemoryPropertiesFlags memoryProperties,

View File

@ -9,6 +9,7 @@
#include "core/helpers/basic_math.h"
#include "test.h"
#include "unit_tests/command_queue/enqueue_read_buffer_fixture.h"
#include "unit_tests/mocks/mock_command_queue.h"
using namespace NEO;
@ -23,10 +24,10 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, givenRenderCompressedGmmWhenAskingForCpuOpe
auto alignedPtr = alignedMalloc(2, MemoryConstants::cacheLineSize);
auto unalignedPtr = ptrOffset(alignedPtr, 1);
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedPtr, 1));
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(unalignedPtr, 1));
gmm->isRenderCompressed = true;
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedPtr, 1));
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(unalignedPtr, 1));
alignedFree(alignedPtr);
}
@ -49,7 +50,7 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenUnalignedReadPtrWhenReadingBufferThenM
bool aligned = (reinterpret_cast<uintptr_t>(unalignedReadPtr) & (MemoryConstants::cacheLineSize - 1)) == 0;
EXPECT_TRUE(!aligned || buffer->isMemObjZeroCopy());
ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedReadPtr, size));
ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed(unalignedReadPtr, size));
retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ,
buffer.get(),
@ -89,7 +90,7 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenUnalignedSrcPtrWhenWritingBufferThenMe
bool aligned = (reinterpret_cast<uintptr_t>(unalignedWritePtr) & (MemoryConstants::cacheLineSize - 1)) == 0;
EXPECT_TRUE(!aligned || buffer->isMemObjZeroCopy());
ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedWritePtr, size));
ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed(unalignedWritePtr, size));
retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ,
buffer.get(),
@ -134,31 +135,31 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenSpecificMemoryStructuresWhenReadingWri
EXPECT_TRUE(buffer->isMemObjZeroCopy());
// zeroCopy == true && aligned/unaligned hostPtr
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, alignedHostPtr, MemoryConstants::cacheLineSize + 1));
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedHostPtr, MemoryConstants::cacheLineSize));
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(alignedHostPtr, MemoryConstants::cacheLineSize + 1));
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(unalignedHostPtr, MemoryConstants::cacheLineSize));
buffer.reset(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, unalignedBufferPtr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS);
// zeroCopy == false && unaligned hostPtr
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedHostPtr, MemoryConstants::cacheLineSize));
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(unalignedHostPtr, MemoryConstants::cacheLineSize));
buffer.reset(Buffer::create(mockContext.get(), CL_MEM_USE_HOST_PTR, 1 * MB, smallBufferPtr, retVal));
// platform LP == true && size <= 10 MB
mockDevice->deviceInfo.platformLP = true;
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, smallBufferPtr, 1 * MB));
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(smallBufferPtr, 1 * MB));
// platform LP == false && size <= 10 MB
mockDevice->deviceInfo.platformLP = false;
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, smallBufferPtr, 1 * MB));
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(smallBufferPtr, 1 * MB));
buffer.reset(Buffer::create(mockContext.get(), CL_MEM_ALLOC_HOST_PTR, largeBufferSize, nullptr, retVal));
// platform LP == false && size > 10 MB
mockDevice->deviceInfo.platformLP = false;
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, buffer->getCpuAddress(), largeBufferSize));
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(buffer->getCpuAddress(), largeBufferSize));
alignedFree(smallBufferPtr);
alignedFree(alignedHostPtr);
@ -176,6 +177,8 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenSpecificMemoryStructuresWhenReadingWri
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
auto mockContext = std::unique_ptr<MockContext>(new MockContext(mockDevice.get()));
auto mockCommandQueue = std::unique_ptr<MockCommandQueue>(new MockCommandQueue);
auto memoryManager = static_cast<OsAgnosticMemoryManager *>(mockDevice->getMemoryManager());
memoryManager->turnOnFakingBigAllocations();
@ -184,20 +187,20 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenSpecificMemoryStructuresWhenReadingWri
EXPECT_TRUE(buffer->isMemObjZeroCopy());
// non blocking
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_FALSE, unalignedHostPtr, MemoryConstants::cacheLineSize));
EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_NDRANGE_KERNEL, false, size, unalignedHostPtr, 0u, nullptr));
buffer.reset(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, unalignedBufferPtr, retVal));
EXPECT_EQ(retVal, CL_SUCCESS);
// zeroCopy == false && aligned hostPtr
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, alignedHostPtr, MemoryConstants::cacheLineSize + 1));
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(alignedHostPtr, MemoryConstants::cacheLineSize + 1));
buffer.reset(Buffer::create(mockContext.get(), CL_MEM_ALLOC_HOST_PTR, largeBufferSize, nullptr, retVal));
// platform LP == true && size > 10 MB
mockDevice->deviceInfo.platformLP = true;
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, buffer->getCpuAddress(), largeBufferSize));
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(buffer->getCpuAddress(), largeBufferSize));
alignedFree(alignedHostPtr);
alignedFree(alignedBufferPtr);
@ -216,7 +219,7 @@ TEST(ReadWriteBufferOnCpu, givenNoHostPtrAndAlignedSizeWhenMemoryAllocationIsInN
std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal));
ASSERT_NE(nullptr, buffer.get());
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, reinterpret_cast<void *>(0x1000), MemoryConstants::pageSize));
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(reinterpret_cast<void *>(0x1000), MemoryConstants::pageSize));
reinterpret_cast<MemoryAllocation *>(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible);
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, reinterpret_cast<void *>(0x1000), MemoryConstants::pageSize));
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(reinterpret_cast<void *>(0x1000), MemoryConstants::pageSize));
}

View File

@ -17,6 +17,7 @@
namespace NEO {
class MockCommandQueue : public CommandQueue {
public:
using CommandQueue::bufferCpuCopyAllowed;
using CommandQueue::device;
using CommandQueue::gpgpuEngine;
using CommandQueue::obtainNewTimestampPacketNodes;

View File

@ -212,7 +212,7 @@ TEST_F(glSharingTests, givenClGLBufferWhenItIsAcquiredThenAcuqireCountIsIncremen
auto memObject = castToObject<Buffer>(glBuffer);
EXPECT_FALSE(memObject->isMemObjZeroCopy());
EXPECT_FALSE(memObject->isReadWriteOnCpuAllowed(true, (void *)0x1001, 100));
EXPECT_FALSE(memObject->isReadWriteOnCpuAllowed((void *)0x1001, 100));
auto currentGraphicsAllocation = memObject->getGraphicsAllocation();
memObject->peekSharingHandler()->acquire(memObject);