mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 22:12:59 +08:00
Revert "performance(ocl): program barrier pc in taskStream"
This reverts commit 839c2d6737.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
0a28d622fa
commit
b5e9c10f64
@@ -348,12 +348,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
void fillCsrDependenciesWithLastBcsPackets(CsrDependencies &csrDeps);
|
||||
void clearLastBcsPackets();
|
||||
|
||||
void setStallingCommandsOnNextFlush(const bool isStallingCommandsOnNextFlushRequired) {
|
||||
stallingCommandsOnNextFlushRequired = isStallingCommandsOnNextFlushRequired;
|
||||
if (!isStallingCommandsOnNextFlushRequired) {
|
||||
dcFlushRequiredOnStallingCommandsOnNextFlush = false;
|
||||
}
|
||||
}
|
||||
void setStallingCommandsOnNextFlush(const bool isStallingCommandsOnNextFlushRequired) { stallingCommandsOnNextFlushRequired = isStallingCommandsOnNextFlushRequired; }
|
||||
bool isStallingCommandsOnNextFlushRequired() const { return stallingCommandsOnNextFlushRequired; }
|
||||
|
||||
void setDcFlushRequiredOnStallingCommandsOnNextFlush(const bool isDcFlushRequiredOnStallingCommandsOnNextFlush) { dcFlushRequiredOnStallingCommandsOnNextFlush = isDcFlushRequiredOnStallingCommandsOnNextFlush; }
|
||||
|
||||
@@ -272,10 +272,11 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
} else if (computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||
if (CL_COMMAND_BARRIER == commandType && !isNonStallingIoqBarrier) {
|
||||
setStallingCommandsOnNextFlush(true);
|
||||
const bool isDcFlushRequiredOnBarrier = NEO::DebugManager.flags.SkipDcFlushOnBarrierWithoutEvents.get() == 0 || event;
|
||||
setDcFlushRequiredOnStallingCommandsOnNextFlush(isDcFlushRequiredOnBarrier);
|
||||
if (NEO::DebugManager.flags.SkipDcFlushOnBarrierWithoutEvents.get() == 0 || event) {
|
||||
setDcFlushRequiredOnStallingCommandsOnNextFlush(true);
|
||||
}
|
||||
this->splitBarrierRequired = true;
|
||||
}
|
||||
this->splitBarrierRequired = true;
|
||||
|
||||
for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) {
|
||||
auto waitlistEvent = castToObjectOrAbort<Event>(eventsRequest.eventWaitList[i]);
|
||||
@@ -777,18 +778,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
|
||||
UNRECOVERABLE_IF(multiDispatchInfo.empty());
|
||||
|
||||
if (!relaxedOrderingEnabled && !getGpgpuCommandStreamReceiver().isMultiTileOperationEnabled() && isStallingCommandsOnNextFlushRequired() && !isBlitAuxTranslationRequired(multiDispatchInfo)) {
|
||||
CsrDependencies csrDeps{};
|
||||
fillCsrDependenciesWithLastBcsPackets(csrDeps);
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, csrDeps, false);
|
||||
|
||||
setupBarrierTimestampForBcsEngines(getGpgpuCommandStreamReceiver().getOsContext().getEngineType(), timestampPacketDependencies);
|
||||
getGpgpuCommandStreamReceiver().programStallingCommandsForBarrier(commandStream, ×tampPacketDependencies.barrierNodes, isDcFlushRequiredOnStallingCommandsOnNextFlush());
|
||||
|
||||
clearLastBcsPackets();
|
||||
setStallingCommandsOnNextFlush(false);
|
||||
}
|
||||
|
||||
auto implicitFlush = false;
|
||||
|
||||
if (printfHandler) {
|
||||
@@ -975,6 +964,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
if (isHandlingBarrier) {
|
||||
clearLastBcsPackets();
|
||||
setStallingCommandsOnNextFlush(false);
|
||||
setDcFlushRequiredOnStallingCommandsOnNextFlush(false);
|
||||
}
|
||||
|
||||
if (gtpinIsGTPinInitialized()) {
|
||||
@@ -1195,6 +1185,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
if (isHandlingBarrier) {
|
||||
clearLastBcsPackets();
|
||||
setStallingCommandsOnNextFlush(false);
|
||||
setDcFlushRequiredOnStallingCommandsOnNextFlush(false);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -273,6 +273,7 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
|
||||
if (isHandlingBarrier) {
|
||||
commandQueue.clearLastBcsPackets();
|
||||
commandQueue.setStallingCommandsOnNextFlush(false);
|
||||
commandQueue.setDcFlushRequiredOnStallingCommandsOnNextFlush(false);
|
||||
}
|
||||
|
||||
if (kernelOperation->blitPropertiesContainer.size() > 0) {
|
||||
@@ -433,6 +434,7 @@ CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool term
|
||||
if (isHandlingBarrier) {
|
||||
commandQueue.clearLastBcsPackets();
|
||||
commandQueue.setStallingCommandsOnNextFlush(false);
|
||||
commandQueue.setDcFlushRequiredOnStallingCommandsOnNextFlush(false);
|
||||
}
|
||||
|
||||
if (kernelOperation->blitEnqueue) {
|
||||
|
||||
@@ -1186,20 +1186,23 @@ HWTEST_F(OoqCommandQueueHwBlitTest, givenBlitBeforeBarrierWhenEnqueueingCommandT
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||
uint64_t lastBlitNodeAddress = TimestampPacketHelper::getContextEndGpuAddress(*pCmdQ->getTimestampPacketContainer()->peekNodes()[0]);
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr));
|
||||
|
||||
auto ccsStart = pCmdQ->getGpgpuCommandStreamReceiver().getCS().getUsed();
|
||||
auto bcsStart = pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0).getUsed();
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr));
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr));
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||
|
||||
uint64_t barrierNodeAddress = 0u;
|
||||
{
|
||||
HardwareParse queueHwParser;
|
||||
queueHwParser.parseCommands<FamilyType>(*pDevice->getUltCommandStreamReceiver<FamilyType>().lastFlushedCommandStream, 0);
|
||||
HardwareParse ccsHwParser;
|
||||
ccsHwParser.parseCommands<FamilyType>(pCmdQ->getGpgpuCommandStreamReceiver().getCS(0), ccsStart);
|
||||
|
||||
const auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(queueHwParser.cmdList.begin(), queueHwParser.cmdList.end());
|
||||
const auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end());
|
||||
const auto semaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||
EXPECT_EQ(lastBlitNodeAddress, semaphore->getSemaphoreGraphicsAddress());
|
||||
|
||||
const auto pipeControlItor = find<PIPE_CONTROL *>(semaphoreItor, queueHwParser.cmdList.end());
|
||||
const auto pipeControlItor = find<PIPE_CONTROL *>(semaphoreItor, ccsHwParser.cmdList.end());
|
||||
const auto pipeControl = genCmdCast<PIPE_CONTROL *>(*pipeControlItor);
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControl->getPostSyncOperation());
|
||||
barrierNodeAddress = pipeControl->getAddress() | (static_cast<uint64_t>(pipeControl->getAddressHigh()) << 32);
|
||||
@@ -1208,9 +1211,6 @@ HWTEST_F(OoqCommandQueueHwBlitTest, givenBlitBeforeBarrierWhenEnqueueingCommandT
|
||||
EXPECT_EQ(pipeControlItor, find<MI_SEMAPHORE_WAIT *>(std::next(semaphoreItor), pipeControlItor));
|
||||
}
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||
|
||||
{
|
||||
HardwareParse bcsHwParser;
|
||||
bcsHwParser.parseCommands<FamilyType>(pCmdQ->getBcsCommandStreamReceiver(aub_stream::ENGINE_BCS)->getCS(0), bcsStart);
|
||||
|
||||
@@ -820,18 +820,16 @@ HWTEST2_F(OoqCommandQueueHwBlitTest, givenBarrierBeforeFirstKernelWhenEnqueueNDR
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueReadBuffer(buffer.get(), CL_FALSE, 0, 1u, ptr, nullptr, 0, nullptr, nullptr));
|
||||
auto ccsStart = pCmdQ->getGpgpuCommandStreamReceiver().getCS().getUsed();
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr));
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr));
|
||||
auto ccsStart = pCmdQ->getGpgpuCommandStreamReceiver().getCS().getUsed();
|
||||
|
||||
HardwareParse queueHwParser;
|
||||
queueHwParser.parseCommands<FamilyType>(*pDevice->getUltCommandStreamReceiver<FamilyType>().lastFlushedCommandStream, 0u);
|
||||
const auto memFenceItor = find<MI_MEM_FENCE *>(queueHwParser.cmdList.begin(), queueHwParser.cmdList.end());
|
||||
EXPECT_NE(queueHwParser.cmdList.end(), memFenceItor);
|
||||
EXPECT_EQ(CL_SUCCESS, pCmdQ->enqueueKernel(kernel, 1, &offset, &gws, nullptr, 0, nullptr, nullptr));
|
||||
|
||||
HardwareParse ccsHwParser;
|
||||
ccsHwParser.parseCommands<FamilyType>(pCmdQ->getGpgpuCommandStreamReceiver().getCS(0), ccsStart);
|
||||
|
||||
const auto memFenceStateItor = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(ccsHwParser.cmdList.begin(), ccsHwParser.cmdList.end());
|
||||
const auto memFenceItor = find<MI_MEM_FENCE *>(memFenceStateItor, ccsHwParser.cmdList.end());
|
||||
EXPECT_NE(ccsHwParser.cmdList.end(), memFenceItor);
|
||||
EXPECT_NE(ccsHwParser.cmdList.end(), memFenceStateItor);
|
||||
}
|
||||
|
||||
@@ -26,9 +26,7 @@ using BarrierTest = Test<CommandEnqueueFixture>;
|
||||
HWTEST_F(BarrierTest, givenCsrWithHigherLevelThenCommandQueueWhenEnqueueBarrierIsCalledThenCommandQueueAlignsToCsrWithoutSendingAnyCommands) {
|
||||
auto pCmdQ = this->pCmdQ;
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
// Set task levels to known values.
|
||||
uint32_t originalCSRLevel = 2;
|
||||
commandStreamReceiver.taskLevel = originalCSRLevel;
|
||||
@@ -71,9 +69,7 @@ HWTEST_F(BarrierTest, GivenCsrTaskLevelGreaterThenCmdqTaskLevelWhenEnqueingBarri
|
||||
auto pCmdQ = this->pCmdQ;
|
||||
auto pCmdBuffer = this->pCmdBuffer;
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
commandStreamReceiver.setMediaVFEStateDirty(false);
|
||||
|
||||
// Set task levels to known values.
|
||||
@@ -206,7 +202,7 @@ HWTEST_F(BarrierTest, WhenEnqueingBarrierWithWaitListThenDependenciesShouldSync)
|
||||
auto pEvent = castToObject<Event>(event);
|
||||
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
|
||||
|
||||
// in this case only cmdQ raises the taskLevel while csr stays intact
|
||||
// in this case only cmdQ raises the taskLevel why csr stay intact
|
||||
EXPECT_EQ(8u, pCmdQ->taskLevel);
|
||||
if (csr.peekTimestampPacketWriteEnabled()) {
|
||||
EXPECT_EQ(pCmdQ->taskLevel + 1, commandStreamReceiver.peekTaskLevel());
|
||||
|
||||
@@ -5,9 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/mocks/mock_csr.h"
|
||||
#include "shared/test/common/test_macros/test_checks_shared.h"
|
||||
#include "shared/test/common/utilities/base_object_utils.h"
|
||||
@@ -370,7 +368,7 @@ HWTEST_F(OOQTaskTests, givenSkipDcFlushOnBarrierWithEventsEnabledWhenEnqueingBar
|
||||
outEvent->release();
|
||||
}
|
||||
|
||||
HWTEST_F(OOQTaskTests, givenSkipDcFlushOnBarrierWithoutEventsDisabledWhenEnqueingBarrierWithWaitListThenDcFlushSet) {
|
||||
HWTEST_F(OOQTaskTests, givenSkipDcFlushOnBarrierWithoutEventsDisableddWhenEnqueingBarrierWithWaitListThenDcFlushSet) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
if (false == commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||
GTEST_SKIP();
|
||||
@@ -390,7 +388,7 @@ HWTEST_F(OOQTaskTests, givenSkipDcFlushOnBarrierWithoutEventsDisabledWhenEnquein
|
||||
EXPECT_TRUE(pCmdQ->isDcFlushRequiredOnStallingCommandsOnNextFlush());
|
||||
}
|
||||
|
||||
HWTEST_F(OOQTaskTests, givenEnqueueMarkerWithWaitListWhenIsMarkerWithPostSyncWriteThenBcsTimestampLastBarrierToWaitForIsNotEmpty) {
|
||||
HWTEST_F(OOQTaskTests, givenEnqueueMarkerWithWaitListWhenIsMarkerWithPostSyncWriteThenBcsTimestapLastBarrierToWaitForIsNotEmpty) {
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
if (false == commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
|
||||
GTEST_SKIP();
|
||||
|
||||
@@ -1755,7 +1755,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenBarrierNodeSetWhe
|
||||
size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags);
|
||||
EXPECT_EQ(expectedCmdSize, estimatedCmdSize);
|
||||
|
||||
commandStreamReceiver->programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags.barrierTimestampPacketNodes, false);
|
||||
commandStreamReceiver->programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags);
|
||||
EXPECT_EQ(estimatedCmdSize, commandStreamCSR.getUsed());
|
||||
|
||||
parseCommands<FamilyType>(commandStreamCSR, 0);
|
||||
|
||||
@@ -945,7 +945,7 @@ HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionEnabledWh
|
||||
size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags);
|
||||
EXPECT_EQ(expectedCmdSize, estimatedCmdSize);
|
||||
|
||||
commandStreamReceiver->programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags.barrierTimestampPacketNodes, false);
|
||||
commandStreamReceiver->programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags);
|
||||
EXPECT_EQ(estimatedCmdSize, commandStreamCSR.getUsed());
|
||||
|
||||
parseCommands<FamilyType>(commandStreamCSR, 0);
|
||||
@@ -992,7 +992,7 @@ HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionDisabledW
|
||||
size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags);
|
||||
EXPECT_EQ(expectedCmdSize, estimatedCmdSize);
|
||||
|
||||
commandStreamReceiver->programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags.barrierTimestampPacketNodes, false);
|
||||
commandStreamReceiver->programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags);
|
||||
EXPECT_EQ(estimatedCmdSize, commandStreamCSR.getUsed());
|
||||
|
||||
parseCommands<FamilyType>(commandStreamCSR, 0);
|
||||
@@ -1045,7 +1045,7 @@ HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionEnabledWh
|
||||
size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags);
|
||||
EXPECT_EQ(expectedSize, estimatedCmdSize);
|
||||
|
||||
commandStreamReceiver->programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags.barrierTimestampPacketNodes, false);
|
||||
commandStreamReceiver->programStallingCommandsForBarrier(commandStreamCSR, dispatchFlags);
|
||||
EXPECT_EQ(estimatedCmdSize, commandStreamCSR.getUsed());
|
||||
EXPECT_EQ(2u, tagNode->getPacketsUsed());
|
||||
|
||||
|
||||
@@ -591,8 +591,6 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
|
||||
size_t getCmdsSizeForComputeBarrierCommand() const override {
|
||||
return 0;
|
||||
}
|
||||
void programStallingCommandsForBarrier(LinearStream &cmdStream, TimestampPacketContainer *barrierTimestampPacketNodes, const bool isDcFlushRequired) override {
|
||||
}
|
||||
GraphicsAllocation *getClearColorAllocation() override { return nullptr; }
|
||||
|
||||
bool createPreemptionAllocation() override {
|
||||
|
||||
@@ -811,7 +811,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnq
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BcsBufferTests, givenStallingCommandsOnNextFlushWhenReleasingMultipleBlockedEnqueuesThenProgramBarrierOnce) {
|
||||
HWTEST_TEMPLATED_F(BcsBufferTests, givenBarrierWithEmptyWaitlistWhenReleasingMultipleBlockedEnqueuesThenProgramBarrierOnce) {
|
||||
DebugManager.flags.OptimizeIoqBarriersHandling.set(0);
|
||||
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
@@ -827,7 +827,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenStallingCommandsOnNextFlushWhenReleasing
|
||||
cl_event waitlist0[] = {&userEvent0};
|
||||
cl_event waitlist1[] = {&userEvent1};
|
||||
|
||||
cmdQ->setStallingCommandsOnNextFlush(true);
|
||||
cmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr);
|
||||
cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 1, waitlist0, nullptr);
|
||||
cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 1, waitlist1, nullptr);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user