Feature(OCL) Use tag nodes for root device synchronization

With this commit events created on multi root device contexts will
synchronize using signaled TagNodes instead of using taskCounts.

Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>

Related-To: NEO-7105
This commit is contained in:
Maciej Plewka
2022-12-07 11:07:43 +00:00
committed by Compute-Runtime-Automation
parent 6fac234655
commit 547d1c37b3
62 changed files with 995 additions and 210 deletions

View File

@@ -20,7 +20,6 @@ namespace NEO {
void flushDependentCsr(CommandStreamReceiver &dependentCsr, CsrDependencies &csrDeps) {
auto csrOwnership = dependentCsr.obtainUniqueOwnership();
dependentCsr.updateTagFromWait();
csrDeps.taskCountContainer.push_back({dependentCsr.peekTaskCount(), reinterpret_cast<uint64_t>(dependentCsr.getTagAddress())});
}
void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver &currentCsr, CsrDependencies::DependenciesType depsType) const {
@@ -54,6 +53,7 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
if (productHelper.isDcFlushAllowed()) {
if (!dependentCsr.isLatestTaskCountFlushed()) {
flushDependentCsr(dependentCsr, csrDeps);
//csrDeps.taskCountContainer.push_back({dependentCsr.peekTaskCount(), reinterpret_cast<uint64_t>(dependentCsr.getTagAddress())});
currentCsr.makeResident(*dependentCsr.getTagAllocation());
}
}
@@ -62,23 +62,22 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
}
}
void EventsRequest::fillCsrDependenciesForTaskCountContainer(CsrDependencies &csrDeps, CommandStreamReceiver &currentCsr) const {
void EventsRequest::fillCsrDependenciesForRootDevices(CsrDependencies &csrDeps, CommandStreamReceiver &currentCsr) const {
for (cl_uint i = 0; i < this->numEventsInWaitList; i++) {
auto event = castToObjectOrAbort<Event>(this->eventWaitList[i]);
if (event->isUserEvent() || CompletionStamp::notReady == event->peekTaskCount()) {
continue;
}
if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() != currentCsr.getRootDeviceIndex()) {
auto timestampPacketContainer = event->getMultiRootDeviceTimestampPacketNodes();
if (!timestampPacketContainer || timestampPacketContainer->peekNodes().empty()) {
continue;
}
auto &dependentCsr = event->getCommandQueue()->getGpgpuCommandStreamReceiver();
if (!dependentCsr.isLatestTaskCountFlushed()) {
flushDependentCsr(dependentCsr, csrDeps);
} else {
csrDeps.taskCountContainer.push_back({event->peekTaskCount(), reinterpret_cast<uint64_t>(dependentCsr.getTagAddress())});
}
auto graphicsAllocation = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagsMultiAllocation()->getGraphicsAllocation(currentCsr.getRootDeviceIndex());
currentCsr.getResidencyAllocations().push_back(graphicsAllocation);
csrDeps.multiRootTimeStampSyncContainer.push_back(timestampPacketContainer);
}
}
}