Refactor dispatching blit enqueue

Change-Id: Ibe499e4815a16d5884510c6804221d2b74dbffd4
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
Related-To: NEO-3020
This commit is contained in:
Dunajski, Bartosz
2019-08-30 09:37:44 +02:00
committed by sys_ocldev
parent 094068807e
commit 77e22bd81b
6 changed files with 130 additions and 30 deletions

View File

@ -347,7 +347,7 @@ class CommandQueueHw : public CommandQueue {
LinearStream &commandStream,
size_t commandStreamStart,
bool &blocking,
bool blitEnqueue,
const EnqueueProperties &enqueueProperties,
TimestampPacketContainer *previousTimestampPacketNodes,
EventsRequest &eventsRequest,
EventBuilder &eventBuilder,
@ -356,7 +356,7 @@ class CommandQueueHw : public CommandQueue {
size_t numSurfaces,
LinearStream *commandStream,
CsrDependencies &csrDeps);
void processDispatchForBlitEnqueue(const MultiDispatchInfo &multiDispatchInfo,
BlitProperties processDispatchForBlitEnqueue(const MultiDispatchInfo &multiDispatchInfo,
TimestampPacketContainer &previousTimestampPacketNodes,
const EventsRequest &eventsRequest,
LinearStream &commandStream,

View File

@ -190,6 +190,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
TimestampPacketContainer previousTimestampPacketNodes;
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
CsrDependencies csrDeps;
BlitProperties blitProperties;
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
csrDeps.fillFromEventsRequestAndMakeResident(eventsRequest, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
@ -218,7 +219,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
bool flushDependenciesForNonKernelCommand = false;
if (blitEnqueue) {
processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, eventsRequest, commandStream, commandType);
blitProperties = processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, eventsRequest, commandStream, commandType);
} else if (multiDispatchInfo.empty() == false) {
processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
hwTimeStamps, blockQueue, devQueueHw, csrDeps, blockedCommandsData.get(),
@ -245,14 +246,15 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
}
CompletionStamp completionStamp = {Event::eventNotReady, taskLevel, 0};
EnqueueProperties enqueueProperties(blitEnqueue, !multiDispatchInfo.empty(), isCacheFlushCommand(commandType),
flushDependenciesForNonKernelCommand, &blitProperties);
if (!blockQueue) {
if (parentKernel) {
processDeviceEnqueue(devQueueHw, multiDispatchInfo, hwTimeStamps, blocking);
}
auto kernelSubmissionRequired = !isCommandWithoutKernel(commandType) && !blitEnqueue;
if (kernelSubmissionRequired) {
if (enqueueProperties.operation == EnqueueProperties::Operation::GpuKernel) {
completionStamp = enqueueNonBlocked<commandType>(
surfacesForResidency,
numSurfaceForResidency,
@ -284,19 +286,20 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
devQueueHw->getDebugQueue());
}
}
} else if (isCacheFlushCommand(commandType) || blitEnqueue || flushDependenciesForNonKernelCommand) {
} else if (enqueueProperties.isFlushWithoutKernelRequired()) {
completionStamp = enqueueCommandWithoutKernel(
surfacesForResidency,
numSurfaceForResidency,
commandStream,
commandStreamStart,
blocking,
blitEnqueue,
enqueueProperties,
&previousTimestampPacketNodes,
eventsRequest,
eventBuilder,
taskLevel);
} else {
UNRECOVERABLE_IF(enqueueProperties.operation != EnqueueProperties::Operation::EnqueueWithoutSubmission);
auto maxTaskCount = this->taskCount;
for (auto eventId = 0u; eventId < numEventsInWaitList; eventId++) {
auto event = castToObject<Event>(eventWaitList[eventId]);
@ -432,7 +435,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const MultiDispatchInfo &multiDispatchInfo,
BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const MultiDispatchInfo &multiDispatchInfo,
TimestampPacketContainer &previousTimestampPacketNodes,
const EventsRequest &eventsRequest,
LinearStream &commandStream,
@ -450,12 +453,10 @@ void CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const MultiDispatc
blitProperties.csrDependencies.push_back(&previousTimestampPacketNodes);
blitProperties.outputTimestampPacket = timestampPacketContainer.get();
previousTimestampPacketNodes.makeResident(*blitCommandStreamReceiver);
timestampPacketContainer->makeResident(*blitCommandStreamReceiver);
blitCommandStreamReceiver->blitBuffer(blitProperties);
auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0);
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(commandStream, *currentTimestampPacketNode);
return blitProperties;
}
template <typename GfxFamily>
@ -809,7 +810,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
LinearStream &commandStream,
size_t commandStreamStart,
bool &blocking,
bool blitEnqueue,
const EnqueueProperties &enqueueProperties,
TimestampPacketContainer *previousTimestampPacketNodes,
EventsRequest &eventsRequest,
EventBuilder &eventBuilder,
@ -826,11 +827,20 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
requiresCoherency |= surface->IsCoherent;
}
if (enqueueProperties.operation == EnqueueProperties::Operation::Blit) {
UNRECOVERABLE_IF(!enqueueProperties.blitProperties);
auto bcsCsr = getBcsCommandStreamReceiver();
previousTimestampPacketNodes->makeResident(*bcsCsr);
timestampPacketContainer->makeResident(*bcsCsr);
bcsCsr->blitBuffer(*enqueueProperties.blitProperties);
}
DispatchFlags dispatchFlags = {};
dispatchFlags.blocking = blocking;
dispatchFlags.multiEngineQueue = multiEngineQueue;
dispatchFlags.preemptionMode = device->getPreemptionMode();
dispatchFlags.implicitFlush = blitEnqueue;
dispatchFlags.implicitFlush = (enqueueProperties.operation == EnqueueProperties::Operation::Blit);
dispatchFlags.guardCommandBufferWithPipeControl = true;
dispatchFlags.outOfOrderExecutionAllowed = getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled();

View File

@ -21,8 +21,6 @@ class LinearStream;
class TimestampPacketContainer;
struct BlitProperties {
BlitProperties() = delete;
static BlitProperties constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection blitDirection,
CommandStreamReceiver &commandStreamReceiver,
GraphicsAllocation *memObjAllocation, void *hostPtr, bool blocking,

View File

@ -15,6 +15,7 @@
namespace NEO {
class MemObj;
class Buffer;
struct BlitProperties;
enum QueueThrottle : uint32_t {
LOW,
@ -77,4 +78,49 @@ struct MapInfo {
uint32_t mipLevel = 0;
bool readOnly = false;
};
struct EnqueueProperties {
enum class Operation {
Blit,
ExplicitCacheFlush,
EnqueueWithoutSubmission,
DependencyResolveOnGpu,
GpuKernel,
};
EnqueueProperties() = delete;
EnqueueProperties(bool blitEnqueue, bool hasKernels, bool isCacheFlushCmd, bool flushDependenciesOnly,
const BlitProperties *blitProperties) {
if (blitEnqueue) {
operation = Operation::Blit;
this->blitProperties = blitProperties;
return;
}
if (hasKernels) {
operation = Operation::GpuKernel;
return;
}
if (isCacheFlushCmd) {
operation = Operation::ExplicitCacheFlush;
return;
}
if (flushDependenciesOnly) {
operation = Operation::DependencyResolveOnGpu;
return;
}
operation = Operation::EnqueueWithoutSubmission;
}
bool isFlushWithoutKernelRequired() const {
return (operation == Operation::Blit) || (operation == Operation::ExplicitCacheFlush) ||
(operation == Operation::DependencyResolveOnGpu);
}
const BlitProperties *blitProperties = nullptr;
Operation operation = Operation::EnqueueWithoutSubmission;
};
} // namespace NEO

View File

@ -5,6 +5,7 @@
*
*/
#include "core/unit_tests/helpers/debug_manager_state_restore.h"
#include "runtime/event/event_builder.h"
#include "runtime/event/user_event.h"
#include "runtime/helpers/timestamp_packet.h"
@ -43,7 +44,9 @@ HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelWhenCommandEnqueuedT
Surface *surfaces[] = {surface.get()};
auto blocking = true;
TimestampPacketContainer previousTimestampPacketNodes;
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, false, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
EnqueueProperties enqueueProperties(false, false, false, true, nullptr);
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
EXPECT_EQ(allocation->getTaskCount(mockCmdQ->getGpgpuCommandStreamReceiver().getOsContext().getContextId()), 1u);
}
@ -57,6 +60,7 @@ struct DispatchFlagsTests : public ::testing::Test {
std::unique_ptr<MockDevice> device;
std::unique_ptr<MockContext> context;
DebugManagerStateRestore restore;
};
HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectDispatchFlags) {
@ -70,7 +74,9 @@ HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectDispa
TimestampPacketContainer previousTimestampPacketNodes;
EventsRequest eventsRequest(0, nullptr, nullptr);
EventBuilder eventBuilder;
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, false, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
EnqueueProperties enqueueProperties(false, false, false, true, nullptr);
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
EXPECT_EQ(blocking, mockCsr->passedDispatchFlags.blocking);
EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush);
@ -81,16 +87,32 @@ HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectDispa
HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKernelThenDoImplicitFlush) {
using CsrType = MockCsrHw2<FamilyType>;
DebugManager.flags.EnableTimestampPacket.set(1);
SetUpImpl<CsrType>();
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
auto mockCsr = static_cast<CsrType *>(&mockCmdQ->getGpgpuCommandStreamReceiver());
mockCsr->skipBlitCalls = true;
mockCmdQ->bcsEngine = mockCmdQ->gpgpuEngine;
cl_int retVal = CL_SUCCESS;
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), 0, 1, nullptr, retVal));
auto blocking = true;
TimestampPacketContainer previousTimestampPacketNodes;
EventsRequest eventsRequest(0, nullptr, nullptr);
EventBuilder eventBuilder;
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, true, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
BuiltinOpParams builtinOpParams;
builtinOpParams.srcMemObj = buffer.get();
builtinOpParams.dstPtr = reinterpret_cast<void *>(0x1234);
MultiDispatchInfo multiDispatchInfo;
multiDispatchInfo.setBuiltinOpParams(builtinOpParams);
mockCmdQ->obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes, true);
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, eventsRequest,
mockCmdQ->getCS(0), 0);
EnqueueProperties enqueueProperties(true, false, false, false, &blitProperties);
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
EXPECT_TRUE(mockCsr->passedDispatchFlags.implicitFlush);
EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl);
@ -98,23 +120,40 @@ HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne
HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelTheAllowOutOfOrderExecution) {
using CsrType = MockCsrHw2<FamilyType>;
DebugManager.flags.EnableTimestampPacket.set(1);
SetUpImpl<CsrType>();
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
auto mockCsr = static_cast<CsrType *>(&mockCmdQ->getGpgpuCommandStreamReceiver());
mockCsr->skipBlitCalls = true;
mockCmdQ->bcsEngine = mockCmdQ->gpgpuEngine;
cl_int retVal = CL_SUCCESS;
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), 0, 1, nullptr, retVal));
TimestampPacketContainer previousTimestampPacketNodes;
EventsRequest eventsRequest(0, nullptr, nullptr);
EventBuilder eventBuilder;
bool blocked = false;
BuiltinOpParams builtinOpParams;
builtinOpParams.srcMemObj = buffer.get();
builtinOpParams.dstPtr = reinterpret_cast<void *>(0x1234);
MultiDispatchInfo multiDispatchInfo;
multiDispatchInfo.setBuiltinOpParams(builtinOpParams);
mockCmdQ->obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes, true);
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, eventsRequest,
mockCmdQ->getCS(0), 0);
EnqueueProperties enqueueProperties(true, false, false, false, &blitProperties);
enqueueProperties.blitProperties = &blitProperties;
mockCsr->nTo1SubmissionModelEnabled = false;
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, true, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, enqueueProperties, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
EXPECT_FALSE(mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed);
mockCsr->nTo1SubmissionModelEnabled = true;
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, true, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, enqueueProperties, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
EXPECT_TRUE(mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed);
}

View File

@ -213,6 +213,13 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
return completionStamp;
}
void blitBuffer(const BlitProperties &blitProperites) override {
if (!skipBlitCalls) {
CommandStreamReceiverHw<GfxFamily>::blitBuffer(blitProperites);
}
}
bool skipBlitCalls = false;
bool storeFlushedTaskStream = false;
std::unique_ptr<uint8_t> storedTaskStream;
size_t storedTaskStreamSize = 0;