Make implicit flush for cross-device dependency
Related-To: NEO-6418 If there's a cross-device dependency, flush batched submissions to avoid deadlock. Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
parent
9d31d36491
commit
4266f861ac
|
@ -17,6 +17,12 @@
|
||||||
|
|
||||||
namespace NEO {
|
namespace NEO {
|
||||||
|
|
||||||
|
void flushDependentCsr(CommandStreamReceiver &dependentCsr, CsrDependencies &csrDeps) {
|
||||||
|
auto csrOwnership = dependentCsr.obtainUniqueOwnership();
|
||||||
|
dependentCsr.updateTagFromWait();
|
||||||
|
csrDeps.taskCountContainer.push_back({dependentCsr.peekTaskCount(), reinterpret_cast<uint64_t>(dependentCsr.getTagAddress())});
|
||||||
|
}
|
||||||
|
|
||||||
void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const {
|
void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependencies &csrDeps, CommandStreamReceiver ¤tCsr, CsrDependencies::DependenciesType depsType) const {
|
||||||
for (cl_uint i = 0; i < this->numEventsInWaitList; i++) {
|
for (cl_uint i = 0; i < this->numEventsInWaitList; i++) {
|
||||||
auto event = castToObjectOrAbort<Event>(this->eventWaitList[i]);
|
auto event = castToObjectOrAbort<Event>(this->eventWaitList[i]);
|
||||||
|
@ -47,10 +53,7 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
|
||||||
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(event->getCommandQueue()->getDevice().getHardwareInfo().platform.eProductFamily);
|
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(event->getCommandQueue()->getDevice().getHardwareInfo().platform.eProductFamily);
|
||||||
if (hwInfoConfig.isDcFlushAllowed()) {
|
if (hwInfoConfig.isDcFlushAllowed()) {
|
||||||
if (!dependentCsr.isLatestTaskCountFlushed()) {
|
if (!dependentCsr.isLatestTaskCountFlushed()) {
|
||||||
auto csrOwnership = dependentCsr.obtainUniqueOwnership();
|
flushDependentCsr(dependentCsr, csrDeps);
|
||||||
dependentCsr.flushBatchedSubmissions();
|
|
||||||
dependentCsr.updateTagFromWait();
|
|
||||||
csrDeps.taskCountContainer.push_back({dependentCsr.peekTaskCount(), reinterpret_cast<uint64_t>(dependentCsr.getTagAddress())});
|
|
||||||
currentCsr.makeResident(*dependentCsr.getTagAllocation());
|
currentCsr.makeResident(*dependentCsr.getTagAllocation());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -67,10 +70,12 @@ void EventsRequest::fillCsrDependenciesForTaskCountContainer(CsrDependencies &cs
|
||||||
}
|
}
|
||||||
|
|
||||||
if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() != currentCsr.getRootDeviceIndex()) {
|
if (event->getCommandQueue() && event->getCommandQueue()->getDevice().getRootDeviceIndex() != currentCsr.getRootDeviceIndex()) {
|
||||||
auto taskCountPreviousRootDevice = event->peekTaskCount();
|
auto &dependentCsr = event->getCommandQueue()->getGpgpuCommandStreamReceiver();
|
||||||
auto tagAddressPreviousRootDevice = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagAddress();
|
if (!dependentCsr.isLatestTaskCountFlushed()) {
|
||||||
|
flushDependentCsr(dependentCsr, csrDeps);
|
||||||
csrDeps.taskCountContainer.push_back({taskCountPreviousRootDevice, reinterpret_cast<uint64_t>(tagAddressPreviousRootDevice)});
|
} else {
|
||||||
|
csrDeps.taskCountContainer.push_back({event->peekTaskCount(), reinterpret_cast<uint64_t>(dependentCsr.getTagAddress())});
|
||||||
|
}
|
||||||
|
|
||||||
auto graphicsAllocation = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagsMultiAllocation()->getGraphicsAllocation(currentCsr.getRootDeviceIndex());
|
auto graphicsAllocation = event->getCommandQueue()->getGpgpuCommandStreamReceiver().getTagsMultiAllocation()->getGraphicsAllocation(currentCsr.getRootDeviceIndex());
|
||||||
currentCsr.getResidencyAllocations().push_back(graphicsAllocation);
|
currentCsr.getResidencyAllocations().push_back(graphicsAllocation);
|
||||||
|
|
|
@ -260,6 +260,7 @@ struct CrossDeviceDependenciesTests : public ::testing::Test {
|
||||||
defaultHwInfo->capabilityTable.blitterOperationsSupported = true;
|
defaultHwInfo->capabilityTable.blitterOperationsSupported = true;
|
||||||
deviceFactory = std::make_unique<UltClDeviceFactory>(3, 0);
|
deviceFactory = std::make_unique<UltClDeviceFactory>(3, 0);
|
||||||
auto device1 = deviceFactory->rootDevices[1];
|
auto device1 = deviceFactory->rootDevices[1];
|
||||||
|
|
||||||
auto device2 = deviceFactory->rootDevices[2];
|
auto device2 = deviceFactory->rootDevices[2];
|
||||||
|
|
||||||
cl_device_id devices[] = {device1, device2};
|
cl_device_id devices[] = {device1, device2};
|
||||||
|
@ -633,6 +634,43 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
|
||||||
pCmdQ2->release();
|
pCmdQ2->release();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(MultiRootDeviceCommandStreamReceiverTests, givenUnflushedQueueAndEventInMultiRootDeviceEnvironmentWhenTheyArePassedToSecondQueueThenFlushSubmissions) {
|
||||||
|
auto deviceFactory = std::make_unique<UltClDeviceFactory>(3, 0);
|
||||||
|
deviceFactory->rootDevices[1]->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||||
|
deviceFactory->rootDevices[1]->getUltCommandStreamReceiver<FamilyType>().useNewResourceImplicitFlush = false;
|
||||||
|
|
||||||
|
cl_device_id devices[] = {deviceFactory->rootDevices[1], deviceFactory->rootDevices[2]};
|
||||||
|
|
||||||
|
auto context = std::make_unique<MockContext>(ClDeviceVector(devices, 2), false);
|
||||||
|
auto pCmdQ1 = context.get()->getSpecialQueue(1u);
|
||||||
|
auto pCmdQ2 = context.get()->getSpecialQueue(2u);
|
||||||
|
|
||||||
|
pCmdQ1->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||||
|
cl_event outputEvent{};
|
||||||
|
cl_event inputEvent;
|
||||||
|
|
||||||
|
pCmdQ1->enqueueMarkerWithWaitList(
|
||||||
|
0,
|
||||||
|
nullptr,
|
||||||
|
&inputEvent);
|
||||||
|
pCmdQ1->enqueueMarkerWithWaitList(
|
||||||
|
1,
|
||||||
|
&inputEvent,
|
||||||
|
&outputEvent);
|
||||||
|
|
||||||
|
EXPECT_FALSE(pCmdQ1->getGpgpuCommandStreamReceiver().isLatestTaskCountFlushed());
|
||||||
|
|
||||||
|
pCmdQ2->enqueueMarkerWithWaitList(
|
||||||
|
1,
|
||||||
|
&outputEvent,
|
||||||
|
nullptr);
|
||||||
|
EXPECT_TRUE(pCmdQ1->getGpgpuCommandStreamReceiver().isLatestTaskCountFlushed());
|
||||||
|
castToObject<Event>(inputEvent)->release();
|
||||||
|
castToObject<Event>(outputEvent)->release();
|
||||||
|
pCmdQ1->finish();
|
||||||
|
pCmdQ2->finish();
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenStaticPartitioningEnabledWhenFlushingTaskThenWorkPartitionAllocationIsMadeResident) {
|
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenStaticPartitioningEnabledWhenFlushingTaskThenWorkPartitionAllocationIsMadeResident) {
|
||||||
DebugManagerStateRestore restore{};
|
DebugManagerStateRestore restore{};
|
||||||
DebugManager.flags.EnableStaticPartitioning.set(1);
|
DebugManager.flags.EnableStaticPartitioning.set(1);
|
||||||
|
|
|
@ -1261,6 +1261,7 @@ inline bool CommandStreamReceiverHw<GfxFamily>::isUpdateTagFromWaitEnabled() {
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
inline void CommandStreamReceiverHw<GfxFamily>::updateTagFromWait() {
|
inline void CommandStreamReceiverHw<GfxFamily>::updateTagFromWait() {
|
||||||
|
flushBatchedSubmissions();
|
||||||
if (isUpdateTagFromWaitEnabled()) {
|
if (isUpdateTagFromWaitEnabled()) {
|
||||||
flushTagUpdate();
|
flushTagUpdate();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2018-2021 Intel Corporation
|
* Copyright (C) 2018-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
|
|
@ -26,10 +26,7 @@ bool DeferrableAllocationDeletion::apply() {
|
||||||
graphicsAllocation.releaseUsageInOsContext(contextId);
|
graphicsAllocation.releaseUsageInOsContext(contextId);
|
||||||
} else {
|
} else {
|
||||||
isStillUsed = true;
|
isStillUsed = true;
|
||||||
engine.commandStreamReceiver->flushBatchedSubmissions();
|
engine.commandStreamReceiver->updateTagFromWait();
|
||||||
if (engine.commandStreamReceiver->peekLatestFlushedTaskCount() < graphicsAllocation.getTaskCount(contextId)) {
|
|
||||||
engine.commandStreamReceiver->updateTagFromWait();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue