mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-20 13:11:34 +08:00
Initial support for enqueue blit operation
Change-Id: Iab37beeeb574243c00a5579568e15bcbd1307c43 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com> Related-To: NEO-3020
This commit is contained in:

committed by
sys_ocldev

parent
031d865b59
commit
576005f7bb
@ -583,4 +583,16 @@ bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandT
|
|||||||
buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) ||
|
buffer->getGraphicsAllocation()->getAllocationType() != GraphicsAllocation::AllocationType::BUFFER_COMPRESSED) ||
|
||||||
buffer->isReadWriteOnCpuAllowed(blocking, numEventsInWaitList, ptr, size);
|
buffer->isReadWriteOnCpuAllowed(blocking, numEventsInWaitList, ptr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cl_int CommandQueue::enqueueReadWriteBufferWithBlitTransfer(cl_command_type commandType, Buffer *buffer,
|
||||||
|
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
|
||||||
|
const cl_event *eventWaitList, cl_event *event) {
|
||||||
|
CsrDependencies csrDependencies;
|
||||||
|
auto blitCommandStreamReceiver = context->getCommandStreamReceiverForBlitOperation(*buffer);
|
||||||
|
|
||||||
|
auto copyDirection = (CL_COMMAND_WRITE_BUFFER == commandType) ? BlitterConstants::BlitWithHostPtrDirection::FromHostPtr
|
||||||
|
: BlitterConstants::BlitWithHostPtrDirection::ToHostPtr;
|
||||||
|
blitCommandStreamReceiver->blitWithHostPtr(*buffer, ptr, true, offset, size, copyDirection, csrDependencies);
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
@ -439,6 +439,10 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
|||||||
void *enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
void *enqueueMapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
||||||
cl_int enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest);
|
cl_int enqueueUnmapMemObject(TransferProperties &transferProperties, EventsRequest &eventsRequest);
|
||||||
|
|
||||||
|
cl_int enqueueReadWriteBufferWithBlitTransfer(cl_command_type commandType, Buffer *buffer,
|
||||||
|
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
|
||||||
|
const cl_event *eventWaitList, cl_event *event);
|
||||||
|
|
||||||
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){};
|
virtual void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueue, unsigned int commandType){};
|
||||||
|
|
||||||
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
|
MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation,
|
||||||
|
@ -40,6 +40,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
|||||||
bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_READ_BUFFER) : true;
|
bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_READ_BUFFER) : true;
|
||||||
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, CL_COMMAND_READ_BUFFER, blockingRead, size, ptr,
|
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, CL_COMMAND_READ_BUFFER, blockingRead, size, ptr,
|
||||||
numEventsInWaitList, eventWaitList);
|
numEventsInWaitList, eventWaitList);
|
||||||
|
bool blitOperationsSupported = device->getExecutionEnvironment()->getHardwareInfo()->capabilityTable.blitterOperationsSupported &&
|
||||||
|
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.get();
|
||||||
|
|
||||||
if (isCpuCopyAllowed) {
|
if (isCpuCopyAllowed) {
|
||||||
if (isMemTransferNeeded) {
|
if (isMemTransferNeeded) {
|
||||||
@ -52,6 +54,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
|
|||||||
} else if (!isMemTransferNeeded) {
|
} else if (!isMemTransferNeeded) {
|
||||||
return enqueueMarkerForReadWriteOperation(buffer, ptr, CL_COMMAND_READ_BUFFER, blockingRead,
|
return enqueueMarkerForReadWriteOperation(buffer, ptr, CL_COMMAND_READ_BUFFER, blockingRead,
|
||||||
numEventsInWaitList, eventWaitList, event);
|
numEventsInWaitList, eventWaitList, event);
|
||||||
|
} else if (blitOperationsSupported) {
|
||||||
|
return enqueueReadWriteBufferWithBlitTransfer(CL_COMMAND_READ_BUFFER, buffer, offset, size, ptr,
|
||||||
|
numEventsInWaitList, eventWaitList, event);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
|
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
|
||||||
|
@ -35,6 +35,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
|||||||
auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_WRITE_BUFFER) : true;
|
auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, CL_COMMAND_WRITE_BUFFER) : true;
|
||||||
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, CL_COMMAND_WRITE_BUFFER, blockingWrite, size, const_cast<void *>(ptr),
|
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, CL_COMMAND_WRITE_BUFFER, blockingWrite, size, const_cast<void *>(ptr),
|
||||||
numEventsInWaitList, eventWaitList);
|
numEventsInWaitList, eventWaitList);
|
||||||
|
bool blitOperationsSupported = device->getExecutionEnvironment()->getHardwareInfo()->capabilityTable.blitterOperationsSupported &&
|
||||||
|
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.get();
|
||||||
|
|
||||||
if (isCpuCopyAllowed) {
|
if (isCpuCopyAllowed) {
|
||||||
if (isMemTransferNeeded) {
|
if (isMemTransferNeeded) {
|
||||||
@ -47,6 +49,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
|
|||||||
} else if (!isMemTransferNeeded) {
|
} else if (!isMemTransferNeeded) {
|
||||||
return enqueueMarkerForReadWriteOperation(buffer, const_cast<void *>(ptr), CL_COMMAND_WRITE_BUFFER, blockingWrite,
|
return enqueueMarkerForReadWriteOperation(buffer, const_cast<void *>(ptr), CL_COMMAND_WRITE_BUFFER, blockingWrite,
|
||||||
numEventsInWaitList, eventWaitList, event);
|
numEventsInWaitList, eventWaitList, event);
|
||||||
|
} else if (blitOperationsSupported) {
|
||||||
|
return enqueueReadWriteBufferWithBlitTransfer(CL_COMMAND_WRITE_BUFFER, buffer, offset, size, const_cast<void *>(ptr),
|
||||||
|
numEventsInWaitList, eventWaitList, event);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
|
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
|
||||||
|
@ -100,6 +100,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableExtendedVaFormats, false, "Enable more format
|
|||||||
DECLARE_DEBUG_VARIABLE(bool, AddClGlSharing, false, "Add cl-gl extension")
|
DECLARE_DEBUG_VARIABLE(bool, AddClGlSharing, false, "Add cl-gl extension")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, EnablePassInlineData, false, "Enable passing of inline data")
|
DECLARE_DEBUG_VARIABLE(bool, EnablePassInlineData, false, "Enable passing of inline data")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, EnableFormatQuery, false, "Enable sharing format querying")
|
DECLARE_DEBUG_VARIABLE(bool, EnableFormatQuery, false, "Enable sharing format querying")
|
||||||
|
DECLARE_DEBUG_VARIABLE(bool, EnableBlitterOperationsForReadWriteBuffers, false, "Use Blitter engine for Read/Write Buffers operations")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableCacheFlushAfterWalker, 0, "-1: platform behavior, 0: disabled, 1: enabled. Adds dedicated cache flush command after WALKER command when surfaces used by kernel require to flush the cache")
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableCacheFlushAfterWalker, 0, "-1: platform behavior, 0: disabled, 1: enabled. Adds dedicated cache flush command after WALKER command when surfaces used by kernel require to flush the cache")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableLocalMemory, -1, "-1: default behavior, 0: disabled, 1: enabled, Allows allocating graphics memory in Local Memory")
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableLocalMemory, -1, "-1: default behavior, 0: disabled, 1: enabled, Allows allocating graphics memory in Local Memory")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableStatelessToStatefulBufferOffsetOpt, -1, "-1: dont override, 0: disable, 1: enable, Enables buffer-offset improvement of the stateless to stateful optimization")
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableStatelessToStatefulBufferOffsetOpt, -1, "-1: dont override, 0: disable, 1: enable, Enables buffer-offset improvement of the stateless to stateful optimization")
|
||||||
|
@ -618,14 +618,10 @@ TEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenRenderCompressedBufferWh
|
|||||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenBufferWithInitializationDataAndBcsCsrWhenCreatingThenUseBlitOperation) {
|
struct BcsBufferTests : public ::testing::Test {
|
||||||
if (is32bit) {
|
class BcsMockContext : public MockContext {
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
class MyMockContext : public MockContext {
|
|
||||||
public:
|
public:
|
||||||
MyMockContext(Device *device) : MockContext(device) {
|
BcsMockContext(Device *device) : MockContext(device) {
|
||||||
bcsOsContext.reset(OsContext::create(nullptr, 0, 0, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false));
|
bcsOsContext.reset(OsContext::create(nullptr, 0, 0, aub_stream::ENGINE_BCS, PreemptionMode::Disabled, false));
|
||||||
bcsCsr.reset(createCommandStream(*device->getExecutionEnvironment()));
|
bcsCsr.reset(createCommandStream(*device->getExecutionEnvironment()));
|
||||||
bcsCsr->setupContext(*bcsOsContext);
|
bcsCsr->setupContext(*bcsOsContext);
|
||||||
@ -638,18 +634,66 @@ HWTEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenBufferWithInitializati
|
|||||||
std::unique_ptr<CommandStreamReceiver> bcsCsr;
|
std::unique_ptr<CommandStreamReceiver> bcsCsr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void SetUp() override {
|
||||||
|
if (is32bit) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
device.reset(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||||
|
bcsMockContext = std::make_unique<BcsMockContext>(device.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
DebugManagerStateRestore restore;
|
||||||
|
std::unique_ptr<MockDevice> device;
|
||||||
|
std::unique_ptr<BcsMockContext> bcsMockContext;
|
||||||
|
uint32_t hostPtr = 0;
|
||||||
|
cl_int retVal = CL_SUCCESS;
|
||||||
|
};
|
||||||
|
|
||||||
|
HWTEST_F(BcsBufferTests, givenBufferWithInitializationDataAndBcsCsrWhenCreatingThenUseBlitOperation) {
|
||||||
|
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsMockContext->bcsCsr.get());
|
||||||
auto newMemoryManager = new MockMemoryManager(true, true, *device->getExecutionEnvironment());
|
auto newMemoryManager = new MockMemoryManager(true, true, *device->getExecutionEnvironment());
|
||||||
device->getExecutionEnvironment()->memoryManager.reset(newMemoryManager);
|
device->getExecutionEnvironment()->memoryManager.reset(newMemoryManager);
|
||||||
context->setMemoryManager(newMemoryManager);
|
bcsMockContext->setMemoryManager(newMemoryManager);
|
||||||
|
|
||||||
auto myContext = clUniquePtr(new MyMockContext(device.get()));
|
|
||||||
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(myContext->bcsCsr.get());
|
|
||||||
|
|
||||||
EXPECT_EQ(0u, bcsCsr->blitBufferCalled);
|
EXPECT_EQ(0u, bcsCsr->blitBufferCalled);
|
||||||
auto bufferForBlt = clUniquePtr(Buffer::create(myContext.get(), CL_MEM_COPY_HOST_PTR, sizeof(uint32_t), &hostPtr, retVal));
|
auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_COPY_HOST_PTR, 2000, &hostPtr, retVal));
|
||||||
EXPECT_EQ(1u, bcsCsr->blitBufferCalled);
|
EXPECT_EQ(1u, bcsCsr->blitBufferCalled);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(BcsBufferTests, givenBcsSupportedWhenEnqueueReadWriteBufferIsCalledThenUseBcsCsr) {
|
||||||
|
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(false);
|
||||||
|
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsMockContext->bcsCsr.get());
|
||||||
|
|
||||||
|
auto bufferForBlt = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
|
||||||
|
bufferForBlt->forceDisallowCPUCopy = true;
|
||||||
|
auto commandQueue = std::unique_ptr<CommandQueue>(CommandQueue::create(bcsMockContext.get(), device.get(), nullptr, retVal));
|
||||||
|
auto *hwInfo = device->getExecutionEnvironment()->getMutableHardwareInfo();
|
||||||
|
|
||||||
|
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(false);
|
||||||
|
hwInfo->capabilityTable.blitterOperationsSupported = false;
|
||||||
|
commandQueue->enqueueWriteBuffer(bufferForBlt.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||||
|
commandQueue->enqueueReadBuffer(bufferForBlt.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||||
|
|
||||||
|
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(true);
|
||||||
|
hwInfo->capabilityTable.blitterOperationsSupported = false;
|
||||||
|
commandQueue->enqueueWriteBuffer(bufferForBlt.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||||
|
commandQueue->enqueueReadBuffer(bufferForBlt.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||||
|
|
||||||
|
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(false);
|
||||||
|
hwInfo->capabilityTable.blitterOperationsSupported = true;
|
||||||
|
commandQueue->enqueueWriteBuffer(bufferForBlt.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||||
|
commandQueue->enqueueReadBuffer(bufferForBlt.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||||
|
|
||||||
|
EXPECT_EQ(0u, bcsCsr->blitBufferCalled);
|
||||||
|
|
||||||
|
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(true);
|
||||||
|
hwInfo->capabilityTable.blitterOperationsSupported = true;
|
||||||
|
commandQueue->enqueueWriteBuffer(bufferForBlt.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||||
|
EXPECT_EQ(1u, bcsCsr->blitBufferCalled);
|
||||||
|
commandQueue->enqueueReadBuffer(bufferForBlt.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
|
||||||
|
EXPECT_EQ(2u, bcsCsr->blitBufferCalled);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenNonRenderCompressedBufferWhenCopyFromHostPtrIsRequiredThenDontCallWriteBuffer) {
|
TEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenNonRenderCompressedBufferWhenCopyFromHostPtrIsRequiredThenDontCallWriteBuffer) {
|
||||||
hwInfo->capabilityTable.ftrRenderCompressedBuffers = false;
|
hwInfo->capabilityTable.ftrRenderCompressedBuffers = false;
|
||||||
|
|
||||||
|
@ -110,3 +110,4 @@ EnableHostPtrTracking = 1
|
|||||||
DisableDcFlushInEpilogue = 0
|
DisableDcFlushInEpilogue = 0
|
||||||
OverrideInvalidEngineWithDefault = 0
|
OverrideInvalidEngineWithDefault = 0
|
||||||
EnableFormatQuery = 0
|
EnableFormatQuery = 0
|
||||||
|
EnableBlitterOperationsForReadWriteBuffers = 0
|
||||||
|
Reference in New Issue
Block a user