Initial support for enqueue blit operation

Change-Id: Iab37beeeb574243c00a5579568e15bcbd1307c43
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
Related-To: NEO-3020
This commit is contained in:
Dunajski, Bartosz
2019-06-05 09:35:58 +02:00
committed by sys_ocldev
parent 031d865b59
commit 576005f7bb
7 changed files with 84 additions and 12 deletions

View File

@ -583,4 +583,16 @@ bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandT
buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) || buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) ||
buffer->isReadWriteOnCpuAllowed(blocking, numEventsInWaitList, ptr, size); buffer->isReadWriteOnCpuAllowed(blocking, numEventsInWaitList, ptr, size);
} }
cl_int CommandQueue::enqueueReadWriteBufferWithBlitTransfer(cl_command_type commandType, Buffer *buffer,
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event) {
CsrDependencies csrDependencies;
auto blitCommandStreamReceiver = context->getCommandStreamReceiverForBlitOperation(*buffer);
auto copyDirection = (CL_COMMAND_WRITE_BUFFER == commandType) ? BlitterConstants::BlitWithHostPtrDirection::FromHostPtr
: BlitterConstants::BlitWithHostPtrDirection::ToHostPtr;
blitCommandStreamReceiver->blitWithHostPtr(*buffer, ptr, true, offset, size, copyDirection, csrDependencies);
return CL_SUCCESS;
}
} // namespace NEO } // namespace NEO

View File

@ -439,6 +439,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
void *enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet); void *enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
cl_int enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest); cl_int enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest);
cl_int enqueueReadWriteBufferWithBlitTransfer(cl_command_type commandType, Buffer *buffer,
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event);
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){}; virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){};
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation, MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,

View File

@ -40,6 +40,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_READ_BUFFER) : true; bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_READ_BUFFER) : true;
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, CL_COMMAND_READ_BUFFER, blockingRead, size, ptr, bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, CL_COMMAND_READ_BUFFER, blockingRead, size, ptr,
numEventsInWaitList, eventWaitList); numEventsInWaitList, eventWaitList);
bool blitOperationsSupported = device->getExecutionEnvironment()->getHardwareInfo()->capabilityTable.blitterOperationsSupported &&
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.get();
if (isCpuCopyAllowed) { if (isCpuCopyAllowed) {
if (isMemTransferNeeded) { if (isMemTransferNeeded) {
@ -52,6 +54,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
} else if (!isMemTransferNeeded) { } else if (!isMemTransferNeeded) {
return enqueueMarkerForReadWriteOperation(buffer, ptr, CL_COMMAND_READ_BUFFER, blockingRead, return enqueueMarkerForReadWriteOperation(buffer, ptr, CL_COMMAND_READ_BUFFER, blockingRead,
numEventsInWaitList, eventWaitList, event); numEventsInWaitList, eventWaitList, event);
} else if (blitOperationsSupported) {
return enqueueReadWriteBufferWithBlitTransfer(CL_COMMAND_READ_BUFFER, buffer, offset, size, ptr,
numEventsInWaitList, eventWaitList, event);
} }
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,

View File

@ -35,6 +35,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_WRITE_BUFFER) : true; auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_WRITE_BUFFER) : true;
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, CL_COMMAND_WRITE_BUFFER, blockingWrite, size, const_cast<void *>(ptr), bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, CL_COMMAND_WRITE_BUFFER, blockingWrite, size, const_cast<void *>(ptr),
numEventsInWaitList, eventWaitList); numEventsInWaitList, eventWaitList);
bool blitOperationsSupported = device->getExecutionEnvironment()->getHardwareInfo()->capabilityTable.blitterOperationsSupported &&
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.get();
if (isCpuCopyAllowed) { if (isCpuCopyAllowed) {
if (isMemTransferNeeded) { if (isMemTransferNeeded) {
@ -47,6 +49,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
} else if (!isMemTransferNeeded) { } else if (!isMemTransferNeeded) {
return enqueueMarkerForReadWriteOperation(buffer, const_cast<void *>(ptr), CL_COMMAND_WRITE_BUFFER, blockingWrite, return enqueueMarkerForReadWriteOperation(buffer, const_cast<void *>(ptr), CL_COMMAND_WRITE_BUFFER, blockingWrite,
numEventsInWaitList, eventWaitList, event); numEventsInWaitList, eventWaitList, event);
} else if (blitOperationsSupported) {
return enqueueReadWriteBufferWithBlitTransfer(CL_COMMAND_WRITE_BUFFER, buffer, offset, size, const_cast<void *>(ptr),
numEventsInWaitList, eventWaitList, event);
} }
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,

View File

@ -100,6 +100,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableExtendedVaFormats, false, "Enable more format
DECLARE_DEBUG_VARIABLE(bool, AddClGlSharing, false, "Add cl-gl extension") DECLARE_DEBUG_VARIABLE(bool, AddClGlSharing, false, "Add cl-gl extension")
DECLARE_DEBUG_VARIABLE(bool, EnablePassInlineData, false, "Enable passing of inline data") DECLARE_DEBUG_VARIABLE(bool, EnablePassInlineData, false, "Enable passing of inline data")
DECLARE_DEBUG_VARIABLE(bool, EnableFormatQuery, false, "Enable sharing format querying") DECLARE_DEBUG_VARIABLE(bool, EnableFormatQuery, false, "Enable sharing format querying")
DECLARE_DEBUG_VARIABLE(bool, EnableBlitterOperationsForReadWriteBuffers, false, "Use Blitter engine for Read/Write Buffers operations")
DECLARE_DEBUG_VARIABLE(int32_t, EnableCacheFlushAfterWalker, 0, "-1: platform behavior, 0: disabled, 1: enabled. Adds dedicated cache flush command after WALKER command when surfaces used by kernel require to flush the cache") DECLARE_DEBUG_VARIABLE(int32_t, EnableCacheFlushAfterWalker, 0, "-1: platform behavior, 0: disabled, 1: enabled. Adds dedicated cache flush command after WALKER command when surfaces used by kernel require to flush the cache")
DECLARE_DEBUG_VARIABLE(int32_t, EnableLocalMemory, -1, "-1: default behavior, 0: disabled, 1: enabled, Allows allocating graphics memory in Local Memory") DECLARE_DEBUG_VARIABLE(int32_t, EnableLocalMemory, -1, "-1: default behavior, 0: disabled, 1: enabled, Allows allocating graphics memory in Local Memory")
DECLARE_DEBUG_VARIABLE(int32_t, EnableStatelessToStatefulBufferOffsetOpt, -1, "-1: dont override, 0: disable, 1: enable, Enables buffer-offset improvement of the stateless to stateful optimization") DECLARE_DEBUG_VARIABLE(int32_t, EnableStatelessToStatefulBufferOffsetOpt, -1, "-1: dont override, 0: disable, 1: enable, Enables buffer-offset improvement of the stateless to stateful optimization")

View File

@ -618,14 +618,10 @@ TEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenRenderCompressedBufferWh
EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(CL_SUCCESS, retVal);
} }
HWTEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenBufferWithInitializationDataAndBcsCsrWhenCreatingThenUseBlitOperation) { struct BcsBufferTests : public ::testing::Test {
if (is32bit) { class BcsMockContext : public MockContext {
return;
}
class MyMockContext : public MockContext {
public: public:
MyMockContext(Device *device) : MockContext(device) { BcsMockContext(Device *device) : MockContext(device) {
bcsOsContext.reset(OsContext::create(nullptr, 0, 0, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false)); bcsOsContext.reset(OsContext::create(nullptr, 0, 0, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false));
bcsCsr.reset(createCommandStream(*device->getExecutionEnvironment())); bcsCsr.reset(createCommandStream(*device->getExecutionEnvironment()));
bcsCsr->setupContext(*bcsOsContext); bcsCsr->setupContext(*bcsOsContext);
@ -638,18 +634,66 @@ HWTEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenBufferWithInitializati
std::unique_ptr<CommandStreamReceiver> bcsCsr; std::unique_ptr<CommandStreamReceiver> bcsCsr;
}; };
void SetUp() override {
if (is32bit) {
GTEST_SKIP();
}
device.reset(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
bcsMockContext = std::make_unique<BcsMockContext>(device.get());
}
DebugManagerStateRestore restore;
std::unique_ptr<MockDevice> device;
std::unique_ptr<BcsMockContext> bcsMockContext;
uint32_t hostPtr = 0;
cl_int retVal = CL_SUCCESS;
};
HWTEST_F(BcsBufferTests, givenBufferWithInitializationDataAndBcsCsrWhenCreatingThenUseBlitOperation) {
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsMockContext->bcsCsr.get());
auto newMemoryManager = new MockMemoryManager(true, true, *device->getExecutionEnvironment()); auto newMemoryManager = new MockMemoryManager(true, true, *device->getExecutionEnvironment());
device->getExecutionEnvironment()->memoryManager.reset(newMemoryManager); device->getExecutionEnvironment()->memoryManager.reset(newMemoryManager);
context->setMemoryManager(newMemoryManager); bcsMockContext->setMemoryManager(newMemoryManager);
auto myContext = clUniquePtr(new MyMockContext(device.get()));
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(myContext->bcsCsr.get());
EXPECT_EQ(0u, bcsCsr->blitBufferCalled); EXPECT_EQ(0u, bcsCsr->blitBufferCalled);
auto bufferForBlt = clUniquePtr(Buffer::create(myContext.get(), CL_MEM_COPY_HOST_PTR, sizeof(uint32_t), &hostPtr, retVal)); auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_COPY_HOST_PTR, 2000, &hostPtr, retVal));
EXPECT_EQ(1u, bcsCsr->blitBufferCalled); EXPECT_EQ(1u, bcsCsr->blitBufferCalled);
} }
HWTEST_F(BcsBufferTests, givenBcsSupportedWhenEnqueueReadWriteBufferIsCalledThenUseBcsCsr) {
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(false);
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsMockContext->bcsCsr.get());
auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
bufferForBlt->forceDisallowCPUCopy = true;
auto commandQueue = std::unique_ptr<CommandQueue>(CommandQueue::create(bcsMockContext.get(), device.get(), nullptr, retVal));
auto *hwInfo = device->getExecutionEnvironment()->getMutableHardwareInfo();
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(false);
hwInfo->capabilityTable.blitterOperationsSupported = false;
commandQueue->enqueueWriteBuffer(bufferForBlt.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(true);
hwInfo->capabilityTable.blitterOperationsSupported = false;
commandQueue->enqueueWriteBuffer(bufferForBlt.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(false);
hwInfo->capabilityTable.blitterOperationsSupported = true;
commandQueue->enqueueWriteBuffer(bufferForBlt.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(0u, bcsCsr->blitBufferCalled);
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(true);
hwInfo->capabilityTable.blitterOperationsSupported = true;
commandQueue->enqueueWriteBuffer(bufferForBlt.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, bcsCsr->blitBufferCalled);
commandQueue->enqueueReadBuffer(bufferForBlt.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(2u, bcsCsr->blitBufferCalled);
}
TEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenNonRenderCompressedBufferWhenCopyFromHostPtrIsRequiredThenDontCallWriteBuffer) { TEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenNonRenderCompressedBufferWhenCopyFromHostPtrIsRequiredThenDontCallWriteBuffer) {
hwInfo->capabilityTable.ftrRenderCompressedBuffers = false; hwInfo->capabilityTable.ftrRenderCompressedBuffers = false;

View File

@ -110,3 +110,4 @@ EnableHostPtrTracking = 1
DisableDcFlushInEpilogue = 0 DisableDcFlushInEpilogue = 0
OverrideInvalidEngineWithDefault = 0 OverrideInvalidEngineWithDefault = 0
EnableFormatQuery = 0 EnableFormatQuery = 0
EnableBlitterOperationsForReadWriteBuffers = 0