mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-20 13:11:34 +08:00
Refactor dispatching blit enqueue
Change-Id: Ibe499e4815a16d5884510c6804221d2b74dbffd4 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com> Related-To: NEO-3020
This commit is contained in:

committed by
sys_ocldev

parent
094068807e
commit
77e22bd81b
@ -347,7 +347,7 @@ class CommandQueueHw : public CommandQueue {
|
||||
LinearStream &commandStream,
|
||||
size_t commandStreamStart,
|
||||
bool &blocking,
|
||||
bool blitEnqueue,
|
||||
const EnqueueProperties &enqueueProperties,
|
||||
TimestampPacketContainer *previousTimestampPacketNodes,
|
||||
EventsRequest &eventsRequest,
|
||||
EventBuilder &eventBuilder,
|
||||
@ -356,7 +356,7 @@ class CommandQueueHw : public CommandQueue {
|
||||
size_t numSurfaces,
|
||||
LinearStream *commandStream,
|
||||
CsrDependencies &csrDeps);
|
||||
void processDispatchForBlitEnqueue(const MultiDispatchInfo &multiDispatchInfo,
|
||||
BlitProperties processDispatchForBlitEnqueue(const MultiDispatchInfo &multiDispatchInfo,
|
||||
TimestampPacketContainer &previousTimestampPacketNodes,
|
||||
const EventsRequest &eventsRequest,
|
||||
LinearStream &commandStream,
|
||||
|
@ -190,6 +190,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
TimestampPacketContainer previousTimestampPacketNodes;
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
CsrDependencies csrDeps;
|
||||
BlitProperties blitProperties;
|
||||
|
||||
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
csrDeps.fillFromEventsRequestAndMakeResident(eventsRequest, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr);
|
||||
@ -218,7 +219,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
bool flushDependenciesForNonKernelCommand = false;
|
||||
|
||||
if (blitEnqueue) {
|
||||
processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, eventsRequest, commandStream, commandType);
|
||||
blitProperties = processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, eventsRequest, commandStream, commandType);
|
||||
} else if (multiDispatchInfo.empty() == false) {
|
||||
processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
|
||||
hwTimeStamps, blockQueue, devQueueHw, csrDeps, blockedCommandsData.get(),
|
||||
@ -245,14 +246,15 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
}
|
||||
|
||||
CompletionStamp completionStamp = {Event::eventNotReady, taskLevel, 0};
|
||||
|
||||
EnqueueProperties enqueueProperties(blitEnqueue, !multiDispatchInfo.empty(), isCacheFlushCommand(commandType),
|
||||
flushDependenciesForNonKernelCommand, &blitProperties);
|
||||
if (!blockQueue) {
|
||||
if (parentKernel) {
|
||||
processDeviceEnqueue(devQueueHw, multiDispatchInfo, hwTimeStamps, blocking);
|
||||
}
|
||||
|
||||
auto kernelSubmissionRequired = !isCommandWithoutKernel(commandType) && !blitEnqueue;
|
||||
|
||||
if (kernelSubmissionRequired) {
|
||||
if (enqueueProperties.operation == EnqueueProperties::Operation::GpuKernel) {
|
||||
completionStamp = enqueueNonBlocked<commandType>(
|
||||
surfacesForResidency,
|
||||
numSurfaceForResidency,
|
||||
@ -284,19 +286,20 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
devQueueHw->getDebugQueue());
|
||||
}
|
||||
}
|
||||
} else if (isCacheFlushCommand(commandType) || blitEnqueue || flushDependenciesForNonKernelCommand) {
|
||||
} else if (enqueueProperties.isFlushWithoutKernelRequired()) {
|
||||
completionStamp = enqueueCommandWithoutKernel(
|
||||
surfacesForResidency,
|
||||
numSurfaceForResidency,
|
||||
commandStream,
|
||||
commandStreamStart,
|
||||
blocking,
|
||||
blitEnqueue,
|
||||
enqueueProperties,
|
||||
&previousTimestampPacketNodes,
|
||||
eventsRequest,
|
||||
eventBuilder,
|
||||
taskLevel);
|
||||
} else {
|
||||
UNRECOVERABLE_IF(enqueueProperties.operation != EnqueueProperties::Operation::EnqueueWithoutSubmission);
|
||||
auto maxTaskCount = this->taskCount;
|
||||
for (auto eventId = 0u; eventId < numEventsInWaitList; eventId++) {
|
||||
auto event = castToObject<Event>(eventWaitList[eventId]);
|
||||
@ -432,7 +435,7 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const MultiDispatchInfo &multiDispatchInfo,
|
||||
BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const MultiDispatchInfo &multiDispatchInfo,
|
||||
TimestampPacketContainer &previousTimestampPacketNodes,
|
||||
const EventsRequest &eventsRequest,
|
||||
LinearStream &commandStream,
|
||||
@ -450,12 +453,10 @@ void CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const MultiDispatc
|
||||
blitProperties.csrDependencies.push_back(&previousTimestampPacketNodes);
|
||||
blitProperties.outputTimestampPacket = timestampPacketContainer.get();
|
||||
|
||||
previousTimestampPacketNodes.makeResident(*blitCommandStreamReceiver);
|
||||
timestampPacketContainer->makeResident(*blitCommandStreamReceiver);
|
||||
blitCommandStreamReceiver->blitBuffer(blitProperties);
|
||||
|
||||
auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0);
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(commandStream, *currentTimestampPacketNode);
|
||||
|
||||
return blitProperties;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@ -809,7 +810,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
LinearStream &commandStream,
|
||||
size_t commandStreamStart,
|
||||
bool &blocking,
|
||||
bool blitEnqueue,
|
||||
const EnqueueProperties &enqueueProperties,
|
||||
TimestampPacketContainer *previousTimestampPacketNodes,
|
||||
EventsRequest &eventsRequest,
|
||||
EventBuilder &eventBuilder,
|
||||
@ -826,11 +827,20 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
requiresCoherency |= surface->IsCoherent;
|
||||
}
|
||||
|
||||
if (enqueueProperties.operation == EnqueueProperties::Operation::Blit) {
|
||||
UNRECOVERABLE_IF(!enqueueProperties.blitProperties);
|
||||
auto bcsCsr = getBcsCommandStreamReceiver();
|
||||
|
||||
previousTimestampPacketNodes->makeResident(*bcsCsr);
|
||||
timestampPacketContainer->makeResident(*bcsCsr);
|
||||
bcsCsr->blitBuffer(*enqueueProperties.blitProperties);
|
||||
}
|
||||
|
||||
DispatchFlags dispatchFlags = {};
|
||||
dispatchFlags.blocking = blocking;
|
||||
dispatchFlags.multiEngineQueue = multiEngineQueue;
|
||||
dispatchFlags.preemptionMode = device->getPreemptionMode();
|
||||
dispatchFlags.implicitFlush = blitEnqueue;
|
||||
dispatchFlags.implicitFlush = (enqueueProperties.operation == EnqueueProperties::Operation::Blit);
|
||||
dispatchFlags.guardCommandBufferWithPipeControl = true;
|
||||
dispatchFlags.outOfOrderExecutionAllowed = getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled();
|
||||
|
||||
|
@ -21,8 +21,6 @@ class LinearStream;
|
||||
class TimestampPacketContainer;
|
||||
|
||||
struct BlitProperties {
|
||||
BlitProperties() = delete;
|
||||
|
||||
static BlitProperties constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection blitDirection,
|
||||
CommandStreamReceiver &commandStreamReceiver,
|
||||
GraphicsAllocation *memObjAllocation, void *hostPtr, bool blocking,
|
||||
|
@ -15,6 +15,7 @@
|
||||
namespace NEO {
|
||||
class MemObj;
|
||||
class Buffer;
|
||||
struct BlitProperties;
|
||||
|
||||
enum QueueThrottle : uint32_t {
|
||||
LOW,
|
||||
@ -77,4 +78,49 @@ struct MapInfo {
|
||||
uint32_t mipLevel = 0;
|
||||
bool readOnly = false;
|
||||
};
|
||||
|
||||
struct EnqueueProperties {
|
||||
enum class Operation {
|
||||
Blit,
|
||||
ExplicitCacheFlush,
|
||||
EnqueueWithoutSubmission,
|
||||
DependencyResolveOnGpu,
|
||||
GpuKernel,
|
||||
};
|
||||
|
||||
EnqueueProperties() = delete;
|
||||
EnqueueProperties(bool blitEnqueue, bool hasKernels, bool isCacheFlushCmd, bool flushDependenciesOnly,
|
||||
const BlitProperties *blitProperties) {
|
||||
if (blitEnqueue) {
|
||||
operation = Operation::Blit;
|
||||
this->blitProperties = blitProperties;
|
||||
return;
|
||||
}
|
||||
|
||||
if (hasKernels) {
|
||||
operation = Operation::GpuKernel;
|
||||
return;
|
||||
}
|
||||
|
||||
if (isCacheFlushCmd) {
|
||||
operation = Operation::ExplicitCacheFlush;
|
||||
return;
|
||||
}
|
||||
|
||||
if (flushDependenciesOnly) {
|
||||
operation = Operation::DependencyResolveOnGpu;
|
||||
return;
|
||||
}
|
||||
|
||||
operation = Operation::EnqueueWithoutSubmission;
|
||||
}
|
||||
|
||||
bool isFlushWithoutKernelRequired() const {
|
||||
return (operation == Operation::Blit) || (operation == Operation::ExplicitCacheFlush) ||
|
||||
(operation == Operation::DependencyResolveOnGpu);
|
||||
}
|
||||
|
||||
const BlitProperties *blitProperties = nullptr;
|
||||
Operation operation = Operation::EnqueueWithoutSubmission;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "core/unit_tests/helpers/debug_manager_state_restore.h"
|
||||
#include "runtime/event/event_builder.h"
|
||||
#include "runtime/event/user_event.h"
|
||||
#include "runtime/helpers/timestamp_packet.h"
|
||||
@ -43,7 +44,9 @@ HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelWhenCommandEnqueuedT
|
||||
Surface *surfaces[] = {surface.get()};
|
||||
auto blocking = true;
|
||||
TimestampPacketContainer previousTimestampPacketNodes;
|
||||
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, false, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
|
||||
EnqueueProperties enqueueProperties(false, false, false, true, nullptr);
|
||||
|
||||
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
|
||||
EXPECT_EQ(allocation->getTaskCount(mockCmdQ->getGpgpuCommandStreamReceiver().getOsContext().getContextId()), 1u);
|
||||
}
|
||||
|
||||
@ -57,6 +60,7 @@ struct DispatchFlagsTests : public ::testing::Test {
|
||||
|
||||
std::unique_ptr<MockDevice> device;
|
||||
std::unique_ptr<MockContext> context;
|
||||
DebugManagerStateRestore restore;
|
||||
};
|
||||
|
||||
HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectDispatchFlags) {
|
||||
@ -70,7 +74,9 @@ HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectDispa
|
||||
TimestampPacketContainer previousTimestampPacketNodes;
|
||||
EventsRequest eventsRequest(0, nullptr, nullptr);
|
||||
EventBuilder eventBuilder;
|
||||
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, false, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
|
||||
|
||||
EnqueueProperties enqueueProperties(false, false, false, true, nullptr);
|
||||
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
|
||||
|
||||
EXPECT_EQ(blocking, mockCsr->passedDispatchFlags.blocking);
|
||||
EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush);
|
||||
@ -81,16 +87,32 @@ HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectDispa
|
||||
|
||||
HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKernelThenDoImplicitFlush) {
|
||||
using CsrType = MockCsrHw2<FamilyType>;
|
||||
DebugManager.flags.EnableTimestampPacket.set(1);
|
||||
SetUpImpl<CsrType>();
|
||||
|
||||
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
|
||||
auto mockCsr = static_cast<CsrType *>(&mockCmdQ->getGpgpuCommandStreamReceiver());
|
||||
mockCsr->skipBlitCalls = true;
|
||||
mockCmdQ->bcsEngine = mockCmdQ->gpgpuEngine;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), 0, 1, nullptr, retVal));
|
||||
|
||||
auto blocking = true;
|
||||
TimestampPacketContainer previousTimestampPacketNodes;
|
||||
EventsRequest eventsRequest(0, nullptr, nullptr);
|
||||
EventBuilder eventBuilder;
|
||||
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, true, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
|
||||
BuiltinOpParams builtinOpParams;
|
||||
builtinOpParams.srcMemObj = buffer.get();
|
||||
builtinOpParams.dstPtr = reinterpret_cast<void *>(0x1234);
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.setBuiltinOpParams(builtinOpParams);
|
||||
|
||||
mockCmdQ->obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes, true);
|
||||
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, eventsRequest,
|
||||
mockCmdQ->getCS(0), 0);
|
||||
|
||||
EnqueueProperties enqueueProperties(true, false, false, false, &blitProperties);
|
||||
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
|
||||
|
||||
EXPECT_TRUE(mockCsr->passedDispatchFlags.implicitFlush);
|
||||
EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl);
|
||||
@ -98,23 +120,40 @@ HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne
|
||||
|
||||
HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelTheAllowOutOfOrderExecution) {
|
||||
using CsrType = MockCsrHw2<FamilyType>;
|
||||
DebugManager.flags.EnableTimestampPacket.set(1);
|
||||
|
||||
SetUpImpl<CsrType>();
|
||||
|
||||
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
|
||||
auto mockCsr = static_cast<CsrType *>(&mockCmdQ->getGpgpuCommandStreamReceiver());
|
||||
mockCsr->skipBlitCalls = true;
|
||||
mockCmdQ->bcsEngine = mockCmdQ->gpgpuEngine;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), 0, 1, nullptr, retVal));
|
||||
|
||||
TimestampPacketContainer previousTimestampPacketNodes;
|
||||
EventsRequest eventsRequest(0, nullptr, nullptr);
|
||||
EventBuilder eventBuilder;
|
||||
|
||||
bool blocked = false;
|
||||
BuiltinOpParams builtinOpParams;
|
||||
builtinOpParams.srcMemObj = buffer.get();
|
||||
builtinOpParams.dstPtr = reinterpret_cast<void *>(0x1234);
|
||||
MultiDispatchInfo multiDispatchInfo;
|
||||
multiDispatchInfo.setBuiltinOpParams(builtinOpParams);
|
||||
|
||||
mockCmdQ->obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes, true);
|
||||
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, eventsRequest,
|
||||
mockCmdQ->getCS(0), 0);
|
||||
EnqueueProperties enqueueProperties(true, false, false, false, &blitProperties);
|
||||
enqueueProperties.blitProperties = &blitProperties;
|
||||
|
||||
mockCsr->nTo1SubmissionModelEnabled = false;
|
||||
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, true, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
|
||||
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, enqueueProperties, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
|
||||
EXPECT_FALSE(mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed);
|
||||
|
||||
mockCsr->nTo1SubmissionModelEnabled = true;
|
||||
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, true, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
|
||||
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, enqueueProperties, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
|
||||
EXPECT_TRUE(mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed);
|
||||
}
|
||||
|
||||
|
@ -213,6 +213,13 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
|
||||
return completionStamp;
|
||||
}
|
||||
|
||||
void blitBuffer(const BlitProperties &blitProperites) override {
|
||||
if (!skipBlitCalls) {
|
||||
CommandStreamReceiverHw<GfxFamily>::blitBuffer(blitProperites);
|
||||
}
|
||||
}
|
||||
|
||||
bool skipBlitCalls = false;
|
||||
bool storeFlushedTaskStream = false;
|
||||
std::unique_ptr<uint8_t> storedTaskStream;
|
||||
size_t storedTaskStreamSize = 0;
|
||||
|
Reference in New Issue
Block a user