mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 23:56:39 +08:00
Feature(OCL) Use tag nodes for root device synchronization
With this commit events created on multi root device contexts will synchronize using signaled TagNodes instead of using taskCounts. Signed-off-by: Maciej Plewka <maciej.plewka@intel.com> Related-To: NEO-7105
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
6fac234655
commit
547d1c37b3
@@ -20,7 +20,6 @@ namespace NEO {
|
||||
void flushDependentCsr(CommandStreamReceiver &dependentCsr, CsrDependencies &csrDeps) {
|
||||
auto csrOwnership = dependentCsr.obtainUniqueOwnership();
|
||||
dependentCsr.updateTagFromWait();
|
||||
csrDeps.taskCountContainer.push_back({dependentCsr.peekTaskCount(), reinterpret_cast<uint64_t>(dependentCsr.getTagAddress())});
|
||||
}
|
||||
|
||||
void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const {
|
||||
@@ -54,6 +53,7 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
|
||||
if (productHelper.isDcFlushAllowed()) {
|
||||
if (!dependentCsr.isLatestTaskCountFlushed()) {
|
||||
flushDependentCsr(dependentCsr, csrDeps);
|
||||
//csrDeps.taskCountContainer.push_back({dependentCsr.peekTaskCount(), reinterpret_cast<uint64_t>(dependentCsr.getTagAddress())});
|
||||
currentCsr.makeResident(*dependentCsr.getTagAllocation());
|
||||
}
|
||||
}
|
||||
@@ -62,23 +62,22 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
|
||||
}
|
||||
}
|
||||
|
||||
void EventsRequest::fillCsrDependenciesForTaskCountContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const {
|
||||
void EventsRequest::fillCsrDependenciesForRootDevices(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const {
|
||||
for (cl_uint i = 0; i < this->numEventsInWaitList; i++) {
|
||||
auto event = castToObjectOrAbort<Event>(this->eventWaitList[i]);
|
||||
if (event->isUserEvent() || CompletionStamp::notReady == event->peekTaskCount()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() != currentCsr.getRootDeviceIndex()) {
|
||||
auto timestampPacketContainer = event->getMultiRootDeviceTimestampPacketNodes();
|
||||
if (!timestampPacketContainer || timestampPacketContainer->peekNodes().empty()) {
|
||||
continue;
|
||||
}
|
||||
auto &dependentCsr = event->getCommandQueue()->getGpgpuCommandStreamReceiver();
|
||||
if (!dependentCsr.isLatestTaskCountFlushed()) {
|
||||
flushDependentCsr(dependentCsr, csrDeps);
|
||||
} else {
|
||||
csrDeps.taskCountContainer.push_back({event->peekTaskCount(), reinterpret_cast<uint64_t>(dependentCsr.getTagAddress())});
|
||||
}
|
||||
|
||||
auto graphicsAllocation = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagsMultiAllocation()->getGraphicsAllocation(currentCsr.getRootDeviceIndex());
|
||||
currentCsr.getResidencyAllocations().push_back(graphicsAllocation);
|
||||
csrDeps.multiRootTimeStampSyncContainer.push_back(timestampPacketContainer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ struct EventsRequest {
|
||||
: numEventsInWaitList(numEventsInWaitList), eventWaitList(eventWaitList), outEvent(outEvent) {}
|
||||
|
||||
void fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const;
|
||||
void fillCsrDependenciesForTaskCountContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const;
|
||||
void fillCsrDependenciesForRootDevices(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr) const;
|
||||
void setupBcsCsrForOutputEvent(CommandStreamReceiver &bcsCsr) const;
|
||||
|
||||
cl_uint numEventsInWaitList;
|
||||
|
||||
@@ -117,10 +117,11 @@ CompletionStamp &CommandMapUnmap::submit(TaskCountType taskLevel, bool terminate
|
||||
|
||||
CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> surfaces,
|
||||
bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr<PrintfHandler> &&printfHandler,
|
||||
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount)
|
||||
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount,
|
||||
TagNodeBase *multiRootDeviceSyncNode)
|
||||
: Command(commandQueue, kernelOperation), surfaces(std::move(surfaces)), flushDC(flushDC), slmUsed(usesSLM),
|
||||
commandType(commandType), printfHandler(std::move(printfHandler)), kernel(kernel),
|
||||
kernelCount(kernelCount), preemptionMode(preemptionMode) {
|
||||
kernelCount(kernelCount), preemptionMode(preemptionMode), multiRootDeviceSyncNode(multiRootDeviceSyncNode) {
|
||||
UNRECOVERABLE_IF(nullptr == this->kernel);
|
||||
kernel->incRefInternal();
|
||||
}
|
||||
@@ -162,6 +163,9 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
|
||||
printfHandler->makeResident(commandStreamReceiver);
|
||||
}
|
||||
makeTimestampPacketsResident(commandStreamReceiver);
|
||||
if (multiRootDeviceSyncNode != nullptr) {
|
||||
commandStreamReceiver.makeResident(*multiRootDeviceSyncNode->getBaseGraphicsAllocation());
|
||||
}
|
||||
|
||||
if (kernelOperation->blitPropertiesContainer.size() > 0) {
|
||||
CsrDependencies csrDeps;
|
||||
@@ -213,7 +217,7 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
|
||||
false); // hasRelaxedOrderingDependencies
|
||||
|
||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||
eventsRequest.fillCsrDependenciesForRootDevices(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||
}
|
||||
|
||||
const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
|
||||
@@ -306,7 +310,7 @@ TaskCountType CommandWithoutKernel::dispatchBlitOperation() {
|
||||
blitProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0];
|
||||
|
||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(blitProperties.csrDependencies, *bcsCsr);
|
||||
eventsRequest.fillCsrDependenciesForRootDevices(blitProperties.csrDependencies, *bcsCsr);
|
||||
}
|
||||
|
||||
const auto newTaskCount = bcsCsr->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
|
||||
@@ -388,7 +392,7 @@ CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool term
|
||||
false); // hasRelaxedOrderingDependencies
|
||||
|
||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||
eventsRequest.fillCsrDependenciesForRootDevices(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||
}
|
||||
|
||||
const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
|
||||
|
||||
@@ -131,7 +131,7 @@ class CommandComputeKernel : public Command {
|
||||
public:
|
||||
CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> surfaces,
|
||||
bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr<PrintfHandler> &&printfHandler,
|
||||
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount);
|
||||
PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount, TagNodeBase *multiRootDeviceSyncNode);
|
||||
|
||||
~CommandComputeKernel() override;
|
||||
|
||||
@@ -150,6 +150,7 @@ class CommandComputeKernel : public Command {
|
||||
Kernel *kernel;
|
||||
uint32_t kernelCount;
|
||||
PreemptionMode preemptionMode;
|
||||
TagNodeBase *multiRootDeviceSyncNode;
|
||||
};
|
||||
|
||||
class CommandWithoutKernel : public Command {
|
||||
|
||||
Reference in New Issue
Block a user