mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 23:03:02 +08:00
fix: Improved event profiling setup for bcs split enqueue
Fixes OpenCL Khronos failures in test profiling with bcs split Related-To: NEO-8927 Signed-off-by: Milczarek, Slawomir <slawomir.milczarek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
44c23cb8be
commit
bec9874487
@@ -386,6 +386,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
|
||||
bool getHeaplessModeEnabled() const { return this->heaplessModeEnabled; }
|
||||
|
||||
bool isBcsSplitInitialized() const { return this->bcsSplitInitialized; }
|
||||
|
||||
protected:
|
||||
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
||||
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);
|
||||
|
||||
@@ -364,7 +364,7 @@ class CommandQueueHw : public CommandQueue {
|
||||
cl_int dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dispatchInfo, Surface *(&surfaces)[surfaceCount], EBuiltInOps::Type builtInOperation, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &csr);
|
||||
|
||||
template <uint32_t cmdType>
|
||||
cl_int enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr);
|
||||
cl_int enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr, EventBuilder *pExternalEventBuilder);
|
||||
|
||||
bool isSplitEnqueueBlitNeeded(TransferDirection transferDirection, size_t transferSize, CommandStreamReceiver &csr);
|
||||
size_t getTotalSizeFromRectRegion(const size_t *region);
|
||||
|
||||
@@ -1108,7 +1108,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
csrDeps.makeResident(getGpgpuCommandStreamReceiver());
|
||||
}
|
||||
|
||||
if (eventBuilder.getEvent() && isProfilingEnabled()) {
|
||||
if (eventBuilder.getEvent() && isProfilingEnabled() && !isBcsSplitInitialized()) {
|
||||
eventBuilder.getEvent()->setSubmitTimeStamp();
|
||||
eventBuilder.getEvent()->setStartTimeStamp();
|
||||
}
|
||||
@@ -1309,6 +1309,16 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlitSplit(MultiDispatchInfo &dispatchIn
|
||||
auto size = dispatchInfo.peekBuiltinOpParams().size.x;
|
||||
auto remainingSize = size;
|
||||
|
||||
EventBuilder externalEventBuilder;
|
||||
EventBuilder *pEventBuilder = nullptr;
|
||||
DEBUG_BREAK_IF(!this->isBcsSplitInitialized());
|
||||
if (event && this->isProfilingEnabled()) {
|
||||
pEventBuilder = &externalEventBuilder;
|
||||
setupEvent(*pEventBuilder, event, cmdType);
|
||||
castToObjectOrAbort<Event>(*event)->setSubmitTimeStamp();
|
||||
castToObjectOrAbort<Event>(*event)->setStartTimeStamp();
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < copyEngines.size(); i++) {
|
||||
auto localSize = remainingSize / (copyEngines.size() - i);
|
||||
auto localParams = dispatchInfo.peekBuiltinOpParams();
|
||||
@@ -1321,7 +1331,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlitSplit(MultiDispatchInfo &dispatchIn
|
||||
|
||||
this->timestampPacketContainer->assignAndIncrementNodesRefCounts(previousEnqueueNode);
|
||||
|
||||
ret = enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, remainingSize == 0 ? event : nullptr, false, *copyEngines[i]);
|
||||
ret = enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, remainingSize == 0 ? event : nullptr, false, *copyEngines[i], pEventBuilder);
|
||||
DEBUG_BREAK_IF(ret != CL_SUCCESS);
|
||||
|
||||
this->timestampPacketContainer->moveNodesToNewContainer(splitNodes);
|
||||
@@ -1349,16 +1359,23 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlitSplit(MultiDispatchInfo &dispatchIn
|
||||
|
||||
template <typename GfxFamily>
|
||||
template <uint32_t cmdType>
|
||||
cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr) {
|
||||
cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDispatchInfo, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event, bool blocking, CommandStreamReceiver &bcsCsr, EventBuilder *pExternalEventBuilder) {
|
||||
auto bcsCommandStreamReceiverOwnership = bcsCsr.obtainUniqueOwnership();
|
||||
std::unique_lock<NEO::CommandStreamReceiver::MutexType> commandStreamReceiverOwnership;
|
||||
|
||||
registerBcsCsrClient(bcsCsr);
|
||||
|
||||
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
|
||||
EventBuilder eventBuilder;
|
||||
EventBuilder internalEventBuilder;
|
||||
EventBuilder *pEventBuilder = nullptr;
|
||||
|
||||
setupEvent(eventBuilder, eventsRequest.outEvent, cmdType);
|
||||
if (pExternalEventBuilder) {
|
||||
DEBUG_BREAK_IF(!this->isBcsSplitInitialized() || !this->isProfilingEnabled());
|
||||
pEventBuilder = pExternalEventBuilder;
|
||||
} else {
|
||||
pEventBuilder = &internalEventBuilder;
|
||||
setupEvent(*pEventBuilder, eventsRequest.outEvent, cmdType);
|
||||
}
|
||||
eventsRequest.setupBcsCsrForOutputEvent(bcsCsr);
|
||||
|
||||
std::unique_ptr<KernelOperation> blockedCommandsData;
|
||||
@@ -1412,11 +1429,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
||||
obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, bcsCsr);
|
||||
csrDeps.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
||||
|
||||
if (eventBuilder.getEvent()) {
|
||||
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
|
||||
if (pEventBuilder->getEvent()) {
|
||||
pEventBuilder->getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
|
||||
}
|
||||
if (eventBuilder.getEvent() && eventBuilder.getEvent()->getContext()->getRootDeviceIndices().size() > 1) {
|
||||
multiRootEventSyncStamp = eventBuilder.getEvent()->getMultiRootTimestampSyncNode();
|
||||
if (pEventBuilder->getEvent() && pEventBuilder->getEvent()->getContext()->getRootDeviceIndices().size() > 1) {
|
||||
multiRootEventSyncStamp = pEventBuilder->getEvent()->getMultiRootTimestampSyncNode();
|
||||
bcsCsr.makeResident(*multiRootEventSyncStamp->getBaseGraphicsAllocation());
|
||||
}
|
||||
|
||||
@@ -1442,7 +1459,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
||||
if (!blockQueue) {
|
||||
completionStamp = enqueueCommandWithoutKernel(nullptr, 0, gpgpuCommandStream, gpgpuCommandStreamStart, blocking,
|
||||
enqueueProperties, timestampPacketDependencies, eventsRequest,
|
||||
eventBuilder, taskLevel, csrDeps, &bcsCsr, false);
|
||||
*pEventBuilder, taskLevel, csrDeps, &bcsCsr, false);
|
||||
if (completionStamp.taskCount > CompletionStamp::notReady) {
|
||||
return CommandQueue::getErrorCodeFromTaskCount(completionStamp.taskCount);
|
||||
}
|
||||
@@ -1453,18 +1470,18 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
||||
}
|
||||
}
|
||||
|
||||
if (eventBuilder.getEvent()) {
|
||||
eventBuilder.getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference());
|
||||
if (pEventBuilder->getEvent()) {
|
||||
pEventBuilder->getEvent()->flushStamp->replaceStampObject(this->flushStamp->getStampReference());
|
||||
}
|
||||
|
||||
this->latestSentEnqueueType = enqueueProperties.operation;
|
||||
|
||||
setLastBcsPacket(bcsCsr.getOsContext().getEngineType());
|
||||
}
|
||||
updateFromCompletionStamp(completionStamp, eventBuilder.getEvent());
|
||||
updateFromCompletionStamp(completionStamp, pEventBuilder->getEvent());
|
||||
|
||||
if (blockQueue) {
|
||||
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, eventBuilder, nullptr, &bcsCsr, multiRootEventSyncStamp);
|
||||
enqueueBlocked(cmdType, nullptr, 0, multiDispatchInfo, timestampPacketDependencies, blockedCommandsData, enqueueProperties, eventsRequest, *pEventBuilder, nullptr, &bcsCsr, multiRootEventSyncStamp);
|
||||
|
||||
if (gpgpuSubmission) {
|
||||
if (debugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() == 1) {
|
||||
@@ -1510,7 +1527,7 @@ cl_int CommandQueueHw<GfxFamily>::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &d
|
||||
if (dispatchInfo.peekBuiltinOpParams().bcsSplit) {
|
||||
ret = enqueueBlitSplit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking, csr);
|
||||
} else {
|
||||
ret = enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking, csr);
|
||||
ret = enqueueBlit<cmdType>(dispatchInfo, numEventsInWaitList, eventWaitList, event, blocking, csr, nullptr);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
Reference in New Issue
Block a user