Initial Blit aux translation support

Change-Id: I67fb71be57cff28a3736d5ffb9e1c39b2498feb8
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2019-11-09 19:02:25 +01:00
committed by sys_ocldev
parent 533cf92d12
commit 46b5513028
17 changed files with 605 additions and 11 deletions

View File

@@ -62,12 +62,16 @@ class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder
protected:
using RegisteredMethodDispatcherT = RegisteredMethodDispatcher<DispatchInfo::DispatchCommandMethodT,
DispatchInfo::EstimateCommandsMethodT>;
template <typename GfxFamily, bool dcFlush>
static void dispatchPipeControl(LinearStream &linearStream) {
static void dispatchPipeControl(LinearStream &linearStream, TimestampPacketDependencies *) {
PipeControlHelper<GfxFamily>::addPipeControl(linearStream, dcFlush);
}
template <typename GfxFamily>
static size_t getSizeForSinglePipeControl(const MemObjsForAuxTranslation *) {
return PipeControlHelper<GfxFamily>::getSizeForSinglePipeControl();
}
template <typename GfxFamily>
void registerPipeControlProgramming(RegisteredMethodDispatcherT &dispatcher, bool dcFlush) const {
if (dcFlush) {
@@ -75,7 +79,7 @@ class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder
} else {
dispatcher.registerMethod(this->dispatchPipeControl<GfxFamily, false>);
}
dispatcher.registerCommandsSizeEstimationMethod(PipeControlHelper<GfxFamily>::getSizeForSinglePipeControl);
dispatcher.registerCommandsSizeEstimationMethod(this->getSizeForSinglePipeControl<GfxFamily>);
}
void resizeKernelInstances(size_t size) const;

View File

@@ -334,6 +334,7 @@ class CommandQueueHw : public CommandQueue {
size_t commandStreamStart,
bool &blocking,
const MultiDispatchInfo &multiDispatchInfo,
const EnqueueProperties &enqueueProperties,
TimestampPacketDependencies &timestampPacketDependencies,
EventsRequest &eventsRequest,
EventBuilder &eventBuilder,
@@ -391,6 +392,7 @@ class CommandQueueHw : public CommandQueue {
const cl_event *eventWaitList, cl_event *event);
MOCKABLE_VIRTUAL void dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo, AuxTranslationDirection auxTranslationDirection);
void setupBlitAuxTranslation(MultiDispatchInfo &multiDispatchInfo);
MOCKABLE_VIRTUAL bool forceStateless(size_t size);
@@ -420,6 +422,10 @@ class CommandQueueHw : public CommandQueue {
return commandStream;
}
void processDispatchForBlitAuxTranslation(const MultiDispatchInfo &multiDispatchInfo, BlitPropertiesContainer &blitPropertiesContainer,
TimestampPacketDependencies &timestampPacketDependencies, const EventsRequest &eventsRequest,
bool queueBlocked);
private:
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override;

View File

@@ -122,4 +122,19 @@ bool CommandQueueHw<Family>::forceStateless(size_t size) {
return size >= 4ull * MemoryConstants::gigaByte;
}
template <typename Family>
void CommandQueueHw<Family>::setupBlitAuxTranslation(MultiDispatchInfo &multiDispatchInfo) {
multiDispatchInfo.begin()->dispatchInitCommands.registerMethod(
TimestampPacketHelper::programSemaphoreWithImplicitDependencyForAuxTranslation<Family, AuxTranslationDirection::AuxToNonAux>);
multiDispatchInfo.begin()->dispatchInitCommands.registerCommandsSizeEstimationMethod(
TimestampPacketHelper::getRequiredCmdStreamSizeForAuxTranslationNodeDependency<Family>);
multiDispatchInfo.rbegin()->dispatchEpilogueCommands.registerMethod(
TimestampPacketHelper::programSemaphoreWithImplicitDependencyForAuxTranslation<Family, AuxTranslationDirection::NonAuxToAux>);
multiDispatchInfo.rbegin()->dispatchEpilogueCommands.registerCommandsSizeEstimationMethod(
TimestampPacketHelper::getRequiredCmdStreamSizeForAuxTranslationNodeDependency<Family>);
}
} // namespace NEO

View File

@@ -91,6 +91,10 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
}
}
if (HwHelperHw<GfxFamily>::isBlitAuxTranslationRequired(multiDispatchInfo)) {
setupBlitAuxTranslation(multiDispatchInfo);
}
enqueueHandler<commandType>(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event);
}
@@ -219,8 +223,14 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
blockedCommandsData, surfacesForResidency, numSurfaceForResidency);
auto commandStreamStart = commandStream.getUsed();
if (HwHelperHw<GfxFamily>::isBlitAuxTranslationRequired(multiDispatchInfo)) {
processDispatchForBlitAuxTranslation(multiDispatchInfo, blitPropertiesContainer, timestampPacketDependencies,
eventsRequest, blockQueue);
}
if (eventBuilder.getEvent() && getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
eventBuilder.getEvent()->addTimestampPacketNodes(timestampPacketDependencies.nonAuxToAuxNodes);
}
bool flushDependenciesForNonKernelCommand = false;
@@ -273,6 +283,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
commandStreamStart,
blocking,
multiDispatchInfo,
enqueueProperties,
timestampPacketDependencies,
eventsRequest,
eventBuilder,
@@ -461,6 +472,51 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const Mu
return blitProperties;
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::processDispatchForBlitAuxTranslation(const MultiDispatchInfo &multiDispatchInfo,
BlitPropertiesContainer &blitPropertiesContainer,
TimestampPacketDependencies &timestampPacketDependencies,
const EventsRequest &eventsRequest, bool queueBlocked) {
auto nodesAllocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
auto numBuffers = multiDispatchInfo.getMemObjsForAuxTranslation()->size();
blitPropertiesContainer.resize(numBuffers * 2);
auto bufferIndex = 0;
for (auto &buffer : *multiDispatchInfo.getMemObjsForAuxTranslation()) {
{
// Aux to NonAux
blitPropertiesContainer[bufferIndex] = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::AuxToNonAux,
buffer->getGraphicsAllocation());
auto auxToNonAuxNode = nodesAllocator->getTag();
timestampPacketDependencies.auxToNonAuxNodes.add(auxToNonAuxNode);
blitPropertiesContainer[bufferIndex].outputTimestampPacket = auxToNonAuxNode;
}
{
// NonAux to Aux
blitPropertiesContainer[bufferIndex + numBuffers] = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::NonAuxToAux,
buffer->getGraphicsAllocation());
auto nonAuxToAuxNode = nodesAllocator->getTag();
timestampPacketDependencies.nonAuxToAuxNodes.add(nonAuxToAuxNode);
blitPropertiesContainer[bufferIndex + numBuffers].outputTimestampPacket = nonAuxToAuxNode;
}
bufferIndex++;
}
if (!queueBlocked) {
getGpgpuCommandStreamReceiver().requestStallingPipeControlOnNextFlush();
timestampPacketDependencies.barrierNodes.add(nodesAllocator->getTag());
// wait for barrier and events before AuxToNonAux
blitPropertiesContainer[0].csrDependencies.push_back(&timestampPacketDependencies.barrierNodes);
blitPropertiesContainer[0].csrDependencies.fillFromEventsRequest(eventsRequest, *getBcsCommandStreamReceiver(),
CsrDependencies::DependenciesType::All);
// wait for NDR before NonAuxToAux
blitPropertiesContainer[numBuffers].csrDependencies.push_back(this->timestampPacketContainer.get());
}
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::processDispatchForCacheFlush(Surface **surfaces,
size_t numSurfaces,
@@ -578,6 +634,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
size_t commandStreamStart,
bool &blocking,
const MultiDispatchInfo &multiDispatchInfo,
const EnqueueProperties &enqueueProperties,
TimestampPacketDependencies &timestampPacketDependencies,
EventsRequest &eventsRequest,
EventBuilder &eventBuilder,
@@ -669,7 +726,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
DispatchFlags dispatchFlags(
{}, //csrDependencies
nullptr, //barrierTimestampPacketNodes
&timestampPacketDependencies.barrierNodes, //barrierTimestampPacketNodes
{}, //pipelineSelectArgs
this->flushStamp->getStampReference(), //flushStampReference
getThrottle(), //throttle
@@ -715,6 +772,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
gtpinNotifyPreFlushTask(this);
}
if (enqueueProperties.blitPropertiesContainer->size() > 0) {
this->bcsTaskCount = getBcsCommandStreamReceiver()->blitBuffer(*enqueueProperties.blitPropertiesContainer, false);
}
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stdout, "preemption = %d.\n", static_cast<int>(dispatchFlags.preemptionMode));
CompletionStamp completionStamp = getGpgpuCommandStreamReceiver().flushTask(
commandStream,

View File

@@ -199,8 +199,8 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
for (auto &dispatchInfo : multiDispatchInfo) {
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel());
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize();
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize();
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(multiDispatchInfo.getMemObjsForAuxTranslation());
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(multiDispatchInfo.getMemObjsForAuxTranslation());
}
if (parentKernel) {
SchedulerKernel &scheduler = commandQueue.getDevice().getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(parentKernel->getContext());

View File

@@ -82,7 +82,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
size_t currentDispatchIndex = 0;
for (auto &dispatchInfo : multiDispatchInfo) {
dispatchInfo.dispatchInitCommands(*commandStream);
dispatchInfo.dispatchInitCommands(*commandStream, timestampPacketDependencies);
bool isMainKernel = (dispatchInfo.getKernel() == mainKernel);
dispatchKernelCommands(commandQueue, dispatchInfo, commandType, *commandStream, isMainKernel,
@@ -90,7 +90,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
offsetInterfaceDescriptorTable, *dsh, *ioh, *ssh);
currentDispatchIndex++;
dispatchInfo.dispatchEpilogueCommands(*commandStream);
dispatchInfo.dispatchEpilogueCommands(*commandStream, timestampPacketDependencies);
}
if (mainKernel->requiresCacheFlushCommand(commandQueue)) {
uint64_t postSyncAddress = 0;

View File

@@ -20,12 +20,13 @@
namespace NEO {
class Kernel;
struct TimestampPacketDependencies;
class DispatchInfo {
public:
using DispatchCommandMethodT = void(LinearStream &commandStream);
using EstimateCommandsMethodT = size_t(void);
using DispatchCommandMethodT = void(LinearStream &commandStream, TimestampPacketDependencies *timestampPacketDependencies);
using EstimateCommandsMethodT = size_t(const MemObjsForAuxTranslation *);
DispatchInfo() = default;
DispatchInfo(Kernel *kernel, uint32_t dim, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset)

View File

@@ -30,6 +30,7 @@ struct EnqueueProperties {
if (hasKernels) {
operation = Operation::GpuKernel;
this->blitPropertiesContainer = blitPropertiesContainer;
return;
}

View File

@@ -94,6 +94,8 @@ class TimestampPacketContainer : public NonCopyableClass {
struct TimestampPacketDependencies : public NonCopyableClass {
TimestampPacketContainer previousEnqueueNodes;
TimestampPacketContainer barrierNodes;
TimestampPacketContainer auxToNonAuxNodes;
TimestampPacketContainer nonAuxToAuxNodes;
};
struct TimestampPacketHelper {
@@ -121,6 +123,23 @@ struct TimestampPacketHelper {
}
}
template <typename GfxFamily, AuxTranslationDirection auxTranslationDirection>
static void programSemaphoreWithImplicitDependencyForAuxTranslation(LinearStream &cmdStream,
const TimestampPacketDependencies *timestampPacketDependencies) {
auto &container = (auxTranslationDirection == AuxTranslationDirection::AuxToNonAux)
? timestampPacketDependencies->auxToNonAuxNodes
: timestampPacketDependencies->nonAuxToAuxNodes;
for (auto &node : container.peekNodes()) {
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(cmdStream, *node);
}
}
template <typename GfxFamily>
static size_t getRequiredCmdStreamSizeForAuxTranslationNodeDependency(const MemObjsForAuxTranslation *memObjsForAuxTranslation) {
return memObjsForAuxTranslation->size() * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency<GfxFamily>();
}
template <typename GfxFamily>
static size_t getRequiredCmdStreamSizeForNodeDependency() {
return sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT) + sizeof(typename GfxFamily::MI_ATOMIC);
@@ -136,4 +155,5 @@ struct TimestampPacketHelper {
return totalNodesCount * getRequiredCmdStreamSizeForNodeDependency<GfxFamily>();
}
};
} // namespace NEO