Revert "fix: add cache flush as dependency for bcs ccs synchronization"

This reverts commit 5e57bb2a32.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2024-05-01 02:32:37 +02:00
committed by Compute-Runtime-Automation
parent c57534eb67
commit 8342c0ae2f
8 changed files with 4 additions and 42 deletions

View File

@@ -580,7 +580,6 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(CommandS
blitProperties.csrDependencies.timestampPacketContainer.push_back(&timestampPacketDependencies.cacheFlushNodes);
blitProperties.csrDependencies.timestampPacketContainer.push_back(&timestampPacketDependencies.previousEnqueueNodes);
blitProperties.csrDependencies.timestampPacketContainer.push_back(&timestampPacketDependencies.barrierNodes);
blitProperties.csrDependencies.timestampPacketContainer.push_back(&timestampPacketDependencies.multiCsrDependencies);
}
blitProperties.multiRootDeviceEventSync = multiRootDeviceEventSync;
auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0);
@@ -1438,14 +1437,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
if (isCacheFlushForBcsRequired() && gpgpuSubmission) {
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
}
for (auto &dependentCsr : csrDeps.csrWithMultiEngineDependencies) {
auto tag = allocator->getTag();
timestampPacketDependencies.multiCsrDependencies.add(tag);
bool submitStatus = dependentCsr->submitDependencyUpdate(tag);
if (!submitStatus) {
return CL_OUT_OF_RESOURCES;
}
}
obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, bcsCsr);
csrDeps.timestampPacketContainer.push_back(&timestampPacketDependencies.previousEnqueueNodes);

View File

@@ -58,14 +58,13 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
if (pushDependency) {
csrDeps.timestampPacketContainer.push_back(timestampPacketContainer);
const auto &productHelper = event->getCommandQueue()->getDevice().getProductHelper();
if (!sameCsr) {
const auto &productHelper = event->getCommandQueue()->getDevice().getProductHelper();
if (productHelper.isDcFlushAllowed()) {
if (!dependentCsr->isLatestTaskCountFlushed()) {
flushDependentCsr(*dependentCsr, csrDeps);
currentCsr.makeResident(*dependentCsr->getTagAllocation());
}
csrDeps.csrWithMultiEngineDependencies.insert(dependentCsr);
}
}
}

View File

@@ -1373,11 +1373,11 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenBlockedEventWhenWaitingForCom
userEvent.setStatus(CL_COMPLETE);
clWaitForEvents(1, &outEvent2);
EXPECT_EQ(3u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
EXPECT_EQ(2u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
EXPECT_EQ(2u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
clWaitForEvents(1, &outEvent1);
EXPECT_EQ(2u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
EXPECT_EQ(1u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
clReleaseEvent(outEvent1);

View File

@@ -66,7 +66,6 @@ class TimestampPackets;
template <typename T1>
class TagAllocator;
class TagNodeBase;
enum class DispatchMode {
deviceDefault = 0, // default for given device
@@ -476,7 +475,6 @@ class CommandStreamReceiver {
uint32_t getRequiredScratchSlot0Size() { return requiredScratchSlot0Size; }
uint32_t getRequiredScratchSlot1Size() { return requiredScratchSlot1Size; }
virtual bool submitDependencyUpdate(TagNodeBase *tag) { return true; }
protected:
void cleanupResources();

View File

@@ -199,7 +199,6 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void programStateBaseAddressHeapless(Device &device, LinearStream &commandStream);
void programComputeModeHeapless(Device &device, LinearStream &commandStream);
void handleAllocationsResidencyForflushTaskStateless(const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh);
bool submitDependencyUpdate(TagNodeBase *tag) override;
protected:
void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags);

View File

@@ -2337,25 +2337,4 @@ inline void CommandStreamReceiverHw<GfxFamily>::chainCsrWorkToTask(LinearStream
this->makeResident(*chainedBatchBuffer);
EncodeNoop<GfxFamily>::alignToCacheLine(commandStreamCSR);
}
template <typename GfxFamily>
bool CommandStreamReceiverHw<GfxFamily>::submitDependencyUpdate(TagNodeBase *tag) {
auto ownership = obtainUniqueOwnership();
auto expectedSize = sizeof(typename GfxFamily::PIPE_CONTROL);
auto &commandStream = getCS(expectedSize);
auto commandStreamStart = commandStream.getUsed();
auto cacheFlushTimestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*tag);
PipeControlArgs args;
args.dcFlushEnable = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, this->peekRootDeviceEnvironment());
MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
commandStream,
PostSyncMode::immediateData,
cacheFlushTimestampPacketGpuAddress,
0,
this->peekRootDeviceEnvironment(),
args);
auto submissionStatus = this->flushSmallTask(commandStream, commandStreamStart);
this->latestFlushedTaskCount = taskCount.load();
return submissionStatus == SubmissionStatus::success;
}
} // namespace NEO

View File

@@ -8,8 +8,6 @@
#pragma once
#include "shared/source/utilities/stackvec.h"
#include <set>
namespace NEO {
class TimestampPacketContainer;
@@ -29,7 +27,5 @@ class CsrDependencies {
void makeResident(CommandStreamReceiver &commandStreamReceiver) const;
void copyNodesToNewContainer(TimestampPacketContainer &newTimestampPacketContainer);
void copyRootDeviceSyncNodesToNewContainer(TimestampPacketContainer &newTimestampPacketContainer);
std::set<CommandStreamReceiver *> csrWithMultiEngineDependencies;
};
} // namespace NEO

View File

@@ -39,7 +39,6 @@ struct TimestampPacketDependencies : public NonCopyableClass {
TimestampPacketContainer barrierNodes;
TimestampPacketContainer auxToNonAuxNodes;
TimestampPacketContainer nonAuxToAuxNodes;
TimestampPacketContainer multiCsrDependencies;
void moveNodesToNewContainer(TimestampPacketContainer &timestampPacketContainer);
};