[2/n] Refactor CPU copies
- move blocking flag to command queue function - refactor buffer function for better readability - add new method to easily state that buffer is compressed. Change-Id: I62465b7bc8b109f14d0012b5641d4b822e3cc908 Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
parent
8cb486e821
commit
618472c23e
|
@ -562,12 +562,23 @@ bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandT
|
|||
return false;
|
||||
}
|
||||
|
||||
if (debugVariableSet && buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
|
||||
//if buffer is compressed we cannot do CPU copy
|
||||
if (buffer->isCompressed()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (debugVariableSet) {
|
||||
return true;
|
||||
}
|
||||
|
||||
//non blocking transfers are not expected to be serviced by CPU
|
||||
//we do not want to artifically stall the pipeline to allow CPU access
|
||||
if (blocking == CL_FALSE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
//check if buffer is compatible
|
||||
if (!buffer->isReadWriteOnCpuAllowed(blocking, ptr, size)) {
|
||||
if (!buffer->isReadWriteOnCpuAllowed(ptr, size)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -501,11 +501,21 @@ size_t Buffer::calculateHostPtrSize(const size_t *origin, const size_t *region,
|
|||
return hostPtrSize;
|
||||
}
|
||||
|
||||
bool Buffer::isReadWriteOnCpuAllowed(cl_bool blocking, void *ptr, size_t size) {
|
||||
return (blocking == CL_TRUE && !forceDisallowCPUCopy) && graphicsAllocation->peekSharedHandle() == 0 &&
|
||||
(isMemObjZeroCopy() || (reinterpret_cast<uintptr_t>(ptr) & (MemoryConstants::cacheLineSize - 1)) != 0) &&
|
||||
bool Buffer::isReadWriteOnCpuAllowed(void *ptr, size_t size) {
|
||||
if (forceDisallowCPUCopy) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (this->isCompressed()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (graphicsAllocation->peekSharedHandle() != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return (isMemObjZeroCopy() || (reinterpret_cast<uintptr_t>(ptr) & (MemoryConstants::cacheLineSize - 1)) != 0) &&
|
||||
(!context->getDevice(0)->getDeviceInfo().platformLP || (size <= maxBufferSizeForReadWriteOnCpu)) &&
|
||||
!(graphicsAllocation->getDefaultGmm() && graphicsAllocation->getDefaultGmm()->isRenderCompressed) &&
|
||||
MemoryPool::isSystemMemoryPool(graphicsAllocation->getMemoryPool());
|
||||
}
|
||||
|
||||
|
@ -582,6 +592,17 @@ uint32_t Buffer::getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) cons
|
|||
}
|
||||
}
|
||||
|
||||
bool Buffer::isCompressed() const {
|
||||
if (this->getGraphicsAllocation()->getDefaultGmm()) {
|
||||
return this->getGraphicsAllocation()->getDefaultGmm()->isRenderCompressed;
|
||||
}
|
||||
if (this->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void Buffer::setSurfaceState(const ClDevice *device,
|
||||
void *surfaceState,
|
||||
size_t svmSize,
|
||||
|
|
|
@ -135,10 +135,12 @@ class Buffer : public MemObj {
|
|||
void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override;
|
||||
void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) override;
|
||||
|
||||
bool isReadWriteOnCpuAllowed(cl_bool blocking, void *ptr, size_t size);
|
||||
bool isReadWriteOnCpuAllowed(void *ptr, size_t size);
|
||||
|
||||
uint32_t getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) const;
|
||||
|
||||
bool isCompressed() const;
|
||||
|
||||
protected:
|
||||
Buffer(Context *context,
|
||||
MemoryPropertiesFlags memoryProperties,
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "core/helpers/basic_math.h"
|
||||
#include "test.h"
|
||||
#include "unit_tests/command_queue/enqueue_read_buffer_fixture.h"
|
||||
#include "unit_tests/mocks/mock_command_queue.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
|
@ -23,10 +24,10 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, givenRenderCompressedGmmWhenAskingForCpuOpe
|
|||
|
||||
auto alignedPtr = alignedMalloc(2, MemoryConstants::cacheLineSize);
|
||||
auto unalignedPtr = ptrOffset(alignedPtr, 1);
|
||||
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedPtr, 1));
|
||||
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(unalignedPtr, 1));
|
||||
|
||||
gmm->isRenderCompressed = true;
|
||||
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedPtr, 1));
|
||||
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(unalignedPtr, 1));
|
||||
|
||||
alignedFree(alignedPtr);
|
||||
}
|
||||
|
@ -49,7 +50,7 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenUnalignedReadPtrWhenReadingBufferThenM
|
|||
|
||||
bool aligned = (reinterpret_cast<uintptr_t>(unalignedReadPtr) & (MemoryConstants::cacheLineSize - 1)) == 0;
|
||||
EXPECT_TRUE(!aligned || buffer->isMemObjZeroCopy());
|
||||
ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedReadPtr, size));
|
||||
ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed(unalignedReadPtr, size));
|
||||
|
||||
retVal = EnqueueReadBufferHelper<>::enqueueReadBuffer(pCmdQ,
|
||||
buffer.get(),
|
||||
|
@ -89,7 +90,7 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenUnalignedSrcPtrWhenWritingBufferThenMe
|
|||
|
||||
bool aligned = (reinterpret_cast<uintptr_t>(unalignedWritePtr) & (MemoryConstants::cacheLineSize - 1)) == 0;
|
||||
EXPECT_TRUE(!aligned || buffer->isMemObjZeroCopy());
|
||||
ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedWritePtr, size));
|
||||
ASSERT_TRUE(buffer->isReadWriteOnCpuAllowed(unalignedWritePtr, size));
|
||||
|
||||
retVal = EnqueueWriteBufferHelper<>::enqueueWriteBuffer(pCmdQ,
|
||||
buffer.get(),
|
||||
|
@ -134,31 +135,31 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenSpecificMemoryStructuresWhenReadingWri
|
|||
EXPECT_TRUE(buffer->isMemObjZeroCopy());
|
||||
|
||||
// zeroCopy == true && aligned/unaligned hostPtr
|
||||
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, alignedHostPtr, MemoryConstants::cacheLineSize + 1));
|
||||
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedHostPtr, MemoryConstants::cacheLineSize));
|
||||
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(alignedHostPtr, MemoryConstants::cacheLineSize + 1));
|
||||
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(unalignedHostPtr, MemoryConstants::cacheLineSize));
|
||||
|
||||
buffer.reset(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, unalignedBufferPtr, retVal));
|
||||
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
|
||||
// zeroCopy == false && unaligned hostPtr
|
||||
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, unalignedHostPtr, MemoryConstants::cacheLineSize));
|
||||
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(unalignedHostPtr, MemoryConstants::cacheLineSize));
|
||||
|
||||
buffer.reset(Buffer::create(mockContext.get(), CL_MEM_USE_HOST_PTR, 1 * MB, smallBufferPtr, retVal));
|
||||
|
||||
// platform LP == true && size <= 10 MB
|
||||
mockDevice->deviceInfo.platformLP = true;
|
||||
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, smallBufferPtr, 1 * MB));
|
||||
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(smallBufferPtr, 1 * MB));
|
||||
|
||||
// platform LP == false && size <= 10 MB
|
||||
mockDevice->deviceInfo.platformLP = false;
|
||||
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, smallBufferPtr, 1 * MB));
|
||||
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(smallBufferPtr, 1 * MB));
|
||||
|
||||
buffer.reset(Buffer::create(mockContext.get(), CL_MEM_ALLOC_HOST_PTR, largeBufferSize, nullptr, retVal));
|
||||
|
||||
// platform LP == false && size > 10 MB
|
||||
mockDevice->deviceInfo.platformLP = false;
|
||||
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, buffer->getCpuAddress(), largeBufferSize));
|
||||
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(buffer->getCpuAddress(), largeBufferSize));
|
||||
|
||||
alignedFree(smallBufferPtr);
|
||||
alignedFree(alignedHostPtr);
|
||||
|
@ -176,6 +177,8 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenSpecificMemoryStructuresWhenReadingWri
|
|||
|
||||
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
auto mockContext = std::unique_ptr<MockContext>(new MockContext(mockDevice.get()));
|
||||
auto mockCommandQueue = std::unique_ptr<MockCommandQueue>(new MockCommandQueue);
|
||||
|
||||
auto memoryManager = static_cast<OsAgnosticMemoryManager *>(mockDevice->getMemoryManager());
|
||||
memoryManager->turnOnFakingBigAllocations();
|
||||
|
||||
|
@ -184,20 +187,20 @@ HWTEST_F(ReadWriteBufferCpuCopyTest, GivenSpecificMemoryStructuresWhenReadingWri
|
|||
EXPECT_TRUE(buffer->isMemObjZeroCopy());
|
||||
|
||||
// non blocking
|
||||
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_FALSE, unalignedHostPtr, MemoryConstants::cacheLineSize));
|
||||
EXPECT_FALSE(mockCommandQueue->bufferCpuCopyAllowed(buffer.get(), CL_COMMAND_NDRANGE_KERNEL, false, size, unalignedHostPtr, 0u, nullptr));
|
||||
|
||||
buffer.reset(Buffer::create(context, CL_MEM_USE_HOST_PTR, size, unalignedBufferPtr, retVal));
|
||||
|
||||
EXPECT_EQ(retVal, CL_SUCCESS);
|
||||
|
||||
// zeroCopy == false && aligned hostPtr
|
||||
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, alignedHostPtr, MemoryConstants::cacheLineSize + 1));
|
||||
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(alignedHostPtr, MemoryConstants::cacheLineSize + 1));
|
||||
|
||||
buffer.reset(Buffer::create(mockContext.get(), CL_MEM_ALLOC_HOST_PTR, largeBufferSize, nullptr, retVal));
|
||||
|
||||
// platform LP == true && size > 10 MB
|
||||
mockDevice->deviceInfo.platformLP = true;
|
||||
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, buffer->getCpuAddress(), largeBufferSize));
|
||||
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(buffer->getCpuAddress(), largeBufferSize));
|
||||
|
||||
alignedFree(alignedHostPtr);
|
||||
alignedFree(alignedBufferPtr);
|
||||
|
@ -216,7 +219,7 @@ TEST(ReadWriteBufferOnCpu, givenNoHostPtrAndAlignedSizeWhenMemoryAllocationIsInN
|
|||
std::unique_ptr<Buffer> buffer(Buffer::create(&ctx, flags, MemoryConstants::pageSize, nullptr, retVal));
|
||||
ASSERT_NE(nullptr, buffer.get());
|
||||
|
||||
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, reinterpret_cast<void *>(0x1000), MemoryConstants::pageSize));
|
||||
EXPECT_TRUE(buffer->isReadWriteOnCpuAllowed(reinterpret_cast<void *>(0x1000), MemoryConstants::pageSize));
|
||||
reinterpret_cast<MemoryAllocation *>(buffer->getGraphicsAllocation())->overrideMemoryPool(MemoryPool::SystemCpuInaccessible);
|
||||
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(CL_TRUE, reinterpret_cast<void *>(0x1000), MemoryConstants::pageSize));
|
||||
EXPECT_FALSE(buffer->isReadWriteOnCpuAllowed(reinterpret_cast<void *>(0x1000), MemoryConstants::pageSize));
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
namespace NEO {
|
||||
class MockCommandQueue : public CommandQueue {
|
||||
public:
|
||||
using CommandQueue::bufferCpuCopyAllowed;
|
||||
using CommandQueue::device;
|
||||
using CommandQueue::gpgpuEngine;
|
||||
using CommandQueue::obtainNewTimestampPacketNodes;
|
||||
|
|
|
@ -212,7 +212,7 @@ TEST_F(glSharingTests, givenClGLBufferWhenItIsAcquiredThenAcuqireCountIsIncremen
|
|||
auto memObject = castToObject<Buffer>(glBuffer);
|
||||
EXPECT_FALSE(memObject->isMemObjZeroCopy());
|
||||
|
||||
EXPECT_FALSE(memObject->isReadWriteOnCpuAllowed(true, (void *)0x1001, 100));
|
||||
EXPECT_FALSE(memObject->isReadWriteOnCpuAllowed((void *)0x1001, 100));
|
||||
auto currentGraphicsAllocation = memObject->getGraphicsAllocation();
|
||||
|
||||
memObject->peekSharingHandler()->acquire(memObject);
|
||||
|
|
Loading…
Reference in New Issue