mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 00:24:58 +08:00
Revert "fix: add cache flush as dependency for bcs ccs synchronization"
This reverts commit 5e57bb2a32.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c57534eb67
commit
8342c0ae2f
@@ -580,7 +580,6 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(CommandS
|
|||||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.cacheFlushNodes);
|
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.cacheFlushNodes);
|
||||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
||||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.barrierNodes);
|
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.barrierNodes);
|
||||||
blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies.multiCsrDependencies);
|
|
||||||
}
|
}
|
||||||
blitProperties.multiRootDeviceEventSync = multiRootDeviceEventSync;
|
blitProperties.multiRootDeviceEventSync = multiRootDeviceEventSync;
|
||||||
auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0);
|
auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0);
|
||||||
@@ -1438,14 +1437,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
|
|||||||
if (isCacheFlushForBcsRequired() && gpgpuSubmission) {
|
if (isCacheFlushForBcsRequired() && gpgpuSubmission) {
|
||||||
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
|
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
|
||||||
}
|
}
|
||||||
for (auto &dependentCsr : csrDeps.csrWithMultiEngineDependencies) {
|
|
||||||
auto tag = allocator->getTag();
|
|
||||||
timestampPacketDependencies.multiCsrDependencies.add(tag);
|
|
||||||
bool submitStatus = dependentCsr->submitDependencyUpdate(tag);
|
|
||||||
if (!submitStatus) {
|
|
||||||
return CL_OUT_OF_RESOURCES;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, bcsCsr);
|
obtainNewTimestampPacketNodes(1, timestampPacketDependencies.previousEnqueueNodes, clearAllDependencies, bcsCsr);
|
||||||
csrDeps.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
csrDeps.timestampPacketContainer.push_back(×tampPacketDependencies.previousEnqueueNodes);
|
||||||
|
|
||||||
|
|||||||
@@ -58,14 +58,13 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
|
|||||||
if (pushDependency) {
|
if (pushDependency) {
|
||||||
csrDeps.timestampPacketContainer.push_back(timestampPacketContainer);
|
csrDeps.timestampPacketContainer.push_back(timestampPacketContainer);
|
||||||
|
|
||||||
const auto &productHelper = event->getCommandQueue()->getDevice().getProductHelper();
|
|
||||||
if (!sameCsr) {
|
if (!sameCsr) {
|
||||||
|
const auto &productHelper = event->getCommandQueue()->getDevice().getProductHelper();
|
||||||
if (productHelper.isDcFlushAllowed()) {
|
if (productHelper.isDcFlushAllowed()) {
|
||||||
if (!dependentCsr->isLatestTaskCountFlushed()) {
|
if (!dependentCsr->isLatestTaskCountFlushed()) {
|
||||||
flushDependentCsr(*dependentCsr, csrDeps);
|
flushDependentCsr(*dependentCsr, csrDeps);
|
||||||
currentCsr.makeResident(*dependentCsr->getTagAllocation());
|
currentCsr.makeResident(*dependentCsr->getTagAllocation());
|
||||||
}
|
}
|
||||||
csrDeps.csrWithMultiEngineDependencies.insert(dependentCsr);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1373,11 +1373,11 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenBlockedEventWhenWaitingForCom
|
|||||||
userEvent.setStatus(CL_COMPLETE);
|
userEvent.setStatus(CL_COMPLETE);
|
||||||
|
|
||||||
clWaitForEvents(1, &outEvent2);
|
clWaitForEvents(1, &outEvent2);
|
||||||
EXPECT_EQ(3u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
|
EXPECT_EQ(2u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
|
||||||
EXPECT_EQ(2u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
|
EXPECT_EQ(2u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
|
||||||
|
|
||||||
clWaitForEvents(1, &outEvent1);
|
clWaitForEvents(1, &outEvent1);
|
||||||
EXPECT_EQ(2u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
|
EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
|
||||||
EXPECT_EQ(1u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
|
EXPECT_EQ(1u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load());
|
||||||
|
|
||||||
clReleaseEvent(outEvent1);
|
clReleaseEvent(outEvent1);
|
||||||
|
|||||||
@@ -66,7 +66,6 @@ class TimestampPackets;
|
|||||||
|
|
||||||
template <typename T1>
|
template <typename T1>
|
||||||
class TagAllocator;
|
class TagAllocator;
|
||||||
class TagNodeBase;
|
|
||||||
|
|
||||||
enum class DispatchMode {
|
enum class DispatchMode {
|
||||||
deviceDefault = 0, // default for given device
|
deviceDefault = 0, // default for given device
|
||||||
@@ -476,7 +475,6 @@ class CommandStreamReceiver {
|
|||||||
|
|
||||||
uint32_t getRequiredScratchSlot0Size() { return requiredScratchSlot0Size; }
|
uint32_t getRequiredScratchSlot0Size() { return requiredScratchSlot0Size; }
|
||||||
uint32_t getRequiredScratchSlot1Size() { return requiredScratchSlot1Size; }
|
uint32_t getRequiredScratchSlot1Size() { return requiredScratchSlot1Size; }
|
||||||
virtual bool submitDependencyUpdate(TagNodeBase *tag) { return true; }
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void cleanupResources();
|
void cleanupResources();
|
||||||
|
|||||||
@@ -199,7 +199,6 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
|||||||
void programStateBaseAddressHeapless(Device &device, LinearStream &commandStream);
|
void programStateBaseAddressHeapless(Device &device, LinearStream &commandStream);
|
||||||
void programComputeModeHeapless(Device &device, LinearStream &commandStream);
|
void programComputeModeHeapless(Device &device, LinearStream &commandStream);
|
||||||
void handleAllocationsResidencyForflushTaskStateless(const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh);
|
void handleAllocationsResidencyForflushTaskStateless(const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh);
|
||||||
bool submitDependencyUpdate(TagNodeBase *tag) override;
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags);
|
void programPreemption(LinearStream &csr, DispatchFlags &dispatchFlags);
|
||||||
|
|||||||
@@ -2337,25 +2337,4 @@ inline void CommandStreamReceiverHw<GfxFamily>::chainCsrWorkToTask(LinearStream
|
|||||||
this->makeResident(*chainedBatchBuffer);
|
this->makeResident(*chainedBatchBuffer);
|
||||||
EncodeNoop<GfxFamily>::alignToCacheLine(commandStreamCSR);
|
EncodeNoop<GfxFamily>::alignToCacheLine(commandStreamCSR);
|
||||||
}
|
}
|
||||||
template <typename GfxFamily>
|
|
||||||
bool CommandStreamReceiverHw<GfxFamily>::submitDependencyUpdate(TagNodeBase *tag) {
|
|
||||||
auto ownership = obtainUniqueOwnership();
|
|
||||||
auto expectedSize = sizeof(typename GfxFamily::PIPE_CONTROL);
|
|
||||||
auto &commandStream = getCS(expectedSize);
|
|
||||||
auto commandStreamStart = commandStream.getUsed();
|
|
||||||
auto cacheFlushTimestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*tag);
|
|
||||||
PipeControlArgs args;
|
|
||||||
args.dcFlushEnable = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, this->peekRootDeviceEnvironment());
|
|
||||||
MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
|
||||||
commandStream,
|
|
||||||
PostSyncMode::immediateData,
|
|
||||||
cacheFlushTimestampPacketGpuAddress,
|
|
||||||
0,
|
|
||||||
this->peekRootDeviceEnvironment(),
|
|
||||||
args);
|
|
||||||
|
|
||||||
auto submissionStatus = this->flushSmallTask(commandStream, commandStreamStart);
|
|
||||||
this->latestFlushedTaskCount = taskCount.load();
|
|
||||||
return submissionStatus == SubmissionStatus::success;
|
|
||||||
}
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -8,8 +8,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
#include "shared/source/utilities/stackvec.h"
|
#include "shared/source/utilities/stackvec.h"
|
||||||
|
|
||||||
#include <set>
|
|
||||||
|
|
||||||
namespace NEO {
|
namespace NEO {
|
||||||
|
|
||||||
class TimestampPacketContainer;
|
class TimestampPacketContainer;
|
||||||
@@ -29,7 +27,5 @@ class CsrDependencies {
|
|||||||
void makeResident(CommandStreamReceiver &commandStreamReceiver) const;
|
void makeResident(CommandStreamReceiver &commandStreamReceiver) const;
|
||||||
void copyNodesToNewContainer(TimestampPacketContainer &newTimestampPacketContainer);
|
void copyNodesToNewContainer(TimestampPacketContainer &newTimestampPacketContainer);
|
||||||
void copyRootDeviceSyncNodesToNewContainer(TimestampPacketContainer &newTimestampPacketContainer);
|
void copyRootDeviceSyncNodesToNewContainer(TimestampPacketContainer &newTimestampPacketContainer);
|
||||||
|
|
||||||
std::set<CommandStreamReceiver *> csrWithMultiEngineDependencies;
|
|
||||||
};
|
};
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -39,7 +39,6 @@ struct TimestampPacketDependencies : public NonCopyableClass {
|
|||||||
TimestampPacketContainer barrierNodes;
|
TimestampPacketContainer barrierNodes;
|
||||||
TimestampPacketContainer auxToNonAuxNodes;
|
TimestampPacketContainer auxToNonAuxNodes;
|
||||||
TimestampPacketContainer nonAuxToAuxNodes;
|
TimestampPacketContainer nonAuxToAuxNodes;
|
||||||
TimestampPacketContainer multiCsrDependencies;
|
|
||||||
|
|
||||||
void moveNodesToNewContainer(TimestampPacketContainer ×tampPacketContainer);
|
void moveNodesToNewContainer(TimestampPacketContainer ×tampPacketContainer);
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user