mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 23:03:02 +08:00
Initial Blit aux translation support
Change-Id: I67fb71be57cff28a3736d5ffb9e1c39b2498feb8 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
533cf92d12
commit
46b5513028
@@ -62,12 +62,16 @@ class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder
|
||||
protected:
|
||||
using RegisteredMethodDispatcherT = RegisteredMethodDispatcher<DispatchInfo::DispatchCommandMethodT,
|
||||
DispatchInfo::EstimateCommandsMethodT>;
|
||||
|
||||
template <typename GfxFamily, bool dcFlush>
|
||||
static void dispatchPipeControl(LinearStream &linearStream) {
|
||||
static void dispatchPipeControl(LinearStream &linearStream, TimestampPacketDependencies *) {
|
||||
PipeControlHelper<GfxFamily>::addPipeControl(linearStream, dcFlush);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static size_t getSizeForSinglePipeControl(const MemObjsForAuxTranslation *) {
|
||||
return PipeControlHelper<GfxFamily>::getSizeForSinglePipeControl();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void registerPipeControlProgramming(RegisteredMethodDispatcherT &dispatcher, bool dcFlush) const {
|
||||
if (dcFlush) {
|
||||
@@ -75,7 +79,7 @@ class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder
|
||||
} else {
|
||||
dispatcher.registerMethod(this->dispatchPipeControl<GfxFamily, false>);
|
||||
}
|
||||
dispatcher.registerCommandsSizeEstimationMethod(PipeControlHelper<GfxFamily>::getSizeForSinglePipeControl);
|
||||
dispatcher.registerCommandsSizeEstimationMethod(this->getSizeForSinglePipeControl<GfxFamily>);
|
||||
}
|
||||
|
||||
void resizeKernelInstances(size_t size) const;
|
||||
|
||||
@@ -334,6 +334,7 @@ class CommandQueueHw : public CommandQueue {
|
||||
size_t commandStreamStart,
|
||||
bool &blocking,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
const EnqueueProperties &enqueueProperties,
|
||||
TimestampPacketDependencies ×tampPacketDependencies,
|
||||
EventsRequest &eventsRequest,
|
||||
EventBuilder &eventBuilder,
|
||||
@@ -391,6 +392,7 @@ class CommandQueueHw : public CommandQueue {
|
||||
const cl_event *eventWaitList, cl_event *event);
|
||||
|
||||
MOCKABLE_VIRTUAL void dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo, AuxTranslationDirection auxTranslationDirection);
|
||||
void setupBlitAuxTranslation(MultiDispatchInfo &multiDispatchInfo);
|
||||
|
||||
MOCKABLE_VIRTUAL bool forceStateless(size_t size);
|
||||
|
||||
@@ -420,6 +422,10 @@ class CommandQueueHw : public CommandQueue {
|
||||
return commandStream;
|
||||
}
|
||||
|
||||
void processDispatchForBlitAuxTranslation(const MultiDispatchInfo &multiDispatchInfo, BlitPropertiesContainer &blitPropertiesContainer,
|
||||
TimestampPacketDependencies ×tampPacketDependencies, const EventsRequest &eventsRequest,
|
||||
bool queueBlocked);
|
||||
|
||||
private:
|
||||
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
|
||||
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override;
|
||||
|
||||
@@ -122,4 +122,19 @@ bool CommandQueueHw<Family>::forceStateless(size_t size) {
|
||||
return size >= 4ull * MemoryConstants::gigaByte;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void CommandQueueHw<Family>::setupBlitAuxTranslation(MultiDispatchInfo &multiDispatchInfo) {
|
||||
multiDispatchInfo.begin()->dispatchInitCommands.registerMethod(
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependencyForAuxTranslation<Family, AuxTranslationDirection::AuxToNonAux>);
|
||||
|
||||
multiDispatchInfo.begin()->dispatchInitCommands.registerCommandsSizeEstimationMethod(
|
||||
TimestampPacketHelper::getRequiredCmdStreamSizeForAuxTranslationNodeDependency<Family>);
|
||||
|
||||
multiDispatchInfo.rbegin()->dispatchEpilogueCommands.registerMethod(
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependencyForAuxTranslation<Family, AuxTranslationDirection::NonAuxToAux>);
|
||||
|
||||
multiDispatchInfo.rbegin()->dispatchEpilogueCommands.registerCommandsSizeEstimationMethod(
|
||||
TimestampPacketHelper::getRequiredCmdStreamSizeForAuxTranslationNodeDependency<Family>);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -91,6 +91,10 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
||||
}
|
||||
}
|
||||
|
||||
if (HwHelperHw<GfxFamily>::isBlitAuxTranslationRequired(multiDispatchInfo)) {
|
||||
setupBlitAuxTranslation(multiDispatchInfo);
|
||||
}
|
||||
|
||||
enqueueHandler<commandType>(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
|
||||
@@ -219,8 +223,14 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
blockedCommandsData, surfacesForResidency, numSurfaceForResidency);
|
||||
auto commandStreamStart = commandStream.getUsed();
|
||||
|
||||
if (HwHelperHw<GfxFamily>::isBlitAuxTranslationRequired(multiDispatchInfo)) {
|
||||
processDispatchForBlitAuxTranslation(multiDispatchInfo, blitPropertiesContainer, timestampPacketDependencies,
|
||||
eventsRequest, blockQueue);
|
||||
}
|
||||
|
||||
if (eventBuilder.getEvent() && getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
|
||||
eventBuilder.getEvent()->addTimestampPacketNodes(timestampPacketDependencies.nonAuxToAuxNodes);
|
||||
}
|
||||
|
||||
bool flushDependenciesForNonKernelCommand = false;
|
||||
@@ -273,6 +283,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
commandStreamStart,
|
||||
blocking,
|
||||
multiDispatchInfo,
|
||||
enqueueProperties,
|
||||
timestampPacketDependencies,
|
||||
eventsRequest,
|
||||
eventBuilder,
|
||||
@@ -461,6 +472,51 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const Mu
|
||||
return blitProperties;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::processDispatchForBlitAuxTranslation(const MultiDispatchInfo &multiDispatchInfo,
|
||||
BlitPropertiesContainer &blitPropertiesContainer,
|
||||
TimestampPacketDependencies ×tampPacketDependencies,
|
||||
const EventsRequest &eventsRequest, bool queueBlocked) {
|
||||
auto nodesAllocator = getGpgpuCommandStreamReceiver().getTimestampPacketAllocator();
|
||||
auto numBuffers = multiDispatchInfo.getMemObjsForAuxTranslation()->size();
|
||||
blitPropertiesContainer.resize(numBuffers * 2);
|
||||
|
||||
auto bufferIndex = 0;
|
||||
for (auto &buffer : *multiDispatchInfo.getMemObjsForAuxTranslation()) {
|
||||
{
|
||||
// Aux to NonAux
|
||||
blitPropertiesContainer[bufferIndex] = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::AuxToNonAux,
|
||||
buffer->getGraphicsAllocation());
|
||||
auto auxToNonAuxNode = nodesAllocator->getTag();
|
||||
timestampPacketDependencies.auxToNonAuxNodes.add(auxToNonAuxNode);
|
||||
blitPropertiesContainer[bufferIndex].outputTimestampPacket = auxToNonAuxNode;
|
||||
}
|
||||
|
||||
{
|
||||
// NonAux to Aux
|
||||
blitPropertiesContainer[bufferIndex + numBuffers] = BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection::NonAuxToAux,
|
||||
buffer->getGraphicsAllocation());
|
||||
auto nonAuxToAuxNode = nodesAllocator->getTag();
|
||||
timestampPacketDependencies.nonAuxToAuxNodes.add(nonAuxToAuxNode);
|
||||
blitPropertiesContainer[bufferIndex + numBuffers].outputTimestampPacket = nonAuxToAuxNode;
|
||||
}
|
||||
bufferIndex++;
|
||||
}
|
||||
|
||||
if (!queueBlocked) {
|
||||
getGpgpuCommandStreamReceiver().requestStallingPipeControlOnNextFlush();
|
||||
timestampPacketDependencies.barrierNodes.add(nodesAllocator->getTag());
|
||||
|
||||
// wait for barrier and events before AuxToNonAux
|
||||
blitPropertiesContainer[0].csrDependencies.push_back(×tampPacketDependencies.barrierNodes);
|
||||
blitPropertiesContainer[0].csrDependencies.fillFromEventsRequest(eventsRequest, *getBcsCommandStreamReceiver(),
|
||||
CsrDependencies::DependenciesType::All);
|
||||
|
||||
// wait for NDR before NonAuxToAux
|
||||
blitPropertiesContainer[numBuffers].csrDependencies.push_back(this->timestampPacketContainer.get());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::processDispatchForCacheFlush(Surface **surfaces,
|
||||
size_t numSurfaces,
|
||||
@@ -578,6 +634,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
size_t commandStreamStart,
|
||||
bool &blocking,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
const EnqueueProperties &enqueueProperties,
|
||||
TimestampPacketDependencies ×tampPacketDependencies,
|
||||
EventsRequest &eventsRequest,
|
||||
EventBuilder &eventBuilder,
|
||||
@@ -669,7 +726,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
|
||||
DispatchFlags dispatchFlags(
|
||||
{}, //csrDependencies
|
||||
nullptr, //barrierTimestampPacketNodes
|
||||
×tampPacketDependencies.barrierNodes, //barrierTimestampPacketNodes
|
||||
{}, //pipelineSelectArgs
|
||||
this->flushStamp->getStampReference(), //flushStampReference
|
||||
getThrottle(), //throttle
|
||||
@@ -715,6 +772,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
gtpinNotifyPreFlushTask(this);
|
||||
}
|
||||
|
||||
if (enqueueProperties.blitPropertiesContainer->size() > 0) {
|
||||
this->bcsTaskCount = getBcsCommandStreamReceiver()->blitBuffer(*enqueueProperties.blitPropertiesContainer, false);
|
||||
}
|
||||
|
||||
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stdout, "preemption = %d.\n", static_cast<int>(dispatchFlags.preemptionMode));
|
||||
CompletionStamp completionStamp = getGpgpuCommandStreamReceiver().flushTask(
|
||||
commandStream,
|
||||
|
||||
@@ -199,8 +199,8 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
|
||||
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel());
|
||||
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize();
|
||||
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize();
|
||||
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(multiDispatchInfo.getMemObjsForAuxTranslation());
|
||||
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(multiDispatchInfo.getMemObjsForAuxTranslation());
|
||||
}
|
||||
if (parentKernel) {
|
||||
SchedulerKernel &scheduler = commandQueue.getDevice().getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(parentKernel->getContext());
|
||||
|
||||
@@ -82,7 +82,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
|
||||
size_t currentDispatchIndex = 0;
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
dispatchInfo.dispatchInitCommands(*commandStream);
|
||||
dispatchInfo.dispatchInitCommands(*commandStream, timestampPacketDependencies);
|
||||
bool isMainKernel = (dispatchInfo.getKernel() == mainKernel);
|
||||
|
||||
dispatchKernelCommands(commandQueue, dispatchInfo, commandType, *commandStream, isMainKernel,
|
||||
@@ -90,7 +90,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
offsetInterfaceDescriptorTable, *dsh, *ioh, *ssh);
|
||||
|
||||
currentDispatchIndex++;
|
||||
dispatchInfo.dispatchEpilogueCommands(*commandStream);
|
||||
dispatchInfo.dispatchEpilogueCommands(*commandStream, timestampPacketDependencies);
|
||||
}
|
||||
if (mainKernel->requiresCacheFlushCommand(commandQueue)) {
|
||||
uint64_t postSyncAddress = 0;
|
||||
|
||||
@@ -20,12 +20,13 @@
|
||||
namespace NEO {
|
||||
|
||||
class Kernel;
|
||||
struct TimestampPacketDependencies;
|
||||
|
||||
class DispatchInfo {
|
||||
|
||||
public:
|
||||
using DispatchCommandMethodT = void(LinearStream &commandStream);
|
||||
using EstimateCommandsMethodT = size_t(void);
|
||||
using DispatchCommandMethodT = void(LinearStream &commandStream, TimestampPacketDependencies *timestampPacketDependencies);
|
||||
using EstimateCommandsMethodT = size_t(const MemObjsForAuxTranslation *);
|
||||
|
||||
DispatchInfo() = default;
|
||||
DispatchInfo(Kernel *kernel, uint32_t dim, Vec3<size_t> gws, Vec3<size_t> elws, Vec3<size_t> offset)
|
||||
|
||||
@@ -30,6 +30,7 @@ struct EnqueueProperties {
|
||||
|
||||
if (hasKernels) {
|
||||
operation = Operation::GpuKernel;
|
||||
this->blitPropertiesContainer = blitPropertiesContainer;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -94,6 +94,8 @@ class TimestampPacketContainer : public NonCopyableClass {
|
||||
struct TimestampPacketDependencies : public NonCopyableClass {
|
||||
TimestampPacketContainer previousEnqueueNodes;
|
||||
TimestampPacketContainer barrierNodes;
|
||||
TimestampPacketContainer auxToNonAuxNodes;
|
||||
TimestampPacketContainer nonAuxToAuxNodes;
|
||||
};
|
||||
|
||||
struct TimestampPacketHelper {
|
||||
@@ -121,6 +123,23 @@ struct TimestampPacketHelper {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily, AuxTranslationDirection auxTranslationDirection>
|
||||
static void programSemaphoreWithImplicitDependencyForAuxTranslation(LinearStream &cmdStream,
|
||||
const TimestampPacketDependencies *timestampPacketDependencies) {
|
||||
auto &container = (auxTranslationDirection == AuxTranslationDirection::AuxToNonAux)
|
||||
? timestampPacketDependencies->auxToNonAuxNodes
|
||||
: timestampPacketDependencies->nonAuxToAuxNodes;
|
||||
|
||||
for (auto &node : container.peekNodes()) {
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(cmdStream, *node);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static size_t getRequiredCmdStreamSizeForAuxTranslationNodeDependency(const MemObjsForAuxTranslation *memObjsForAuxTranslation) {
|
||||
return memObjsForAuxTranslation->size() * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency<GfxFamily>();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static size_t getRequiredCmdStreamSizeForNodeDependency() {
|
||||
return sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT) + sizeof(typename GfxFamily::MI_ATOMIC);
|
||||
@@ -136,4 +155,5 @@ struct TimestampPacketHelper {
|
||||
return totalNodesCount * getRequiredCmdStreamSizeForNodeDependency<GfxFamily>();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
Reference in New Issue
Block a user