mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 15:53:45 +08:00
feature: program in-order barrier as non stalling instruction
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c5d675570a
commit
f9a58b5544
@@ -2921,7 +2921,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
||||
|
||||
appendEventForProfiling(signalEvent, true);
|
||||
|
||||
if (isCopyOnly()) {
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
appendSignalInOrderDependencyCounter();
|
||||
} else if (isCopyOnly()) {
|
||||
NEO::MiFlushArgs args{this->dummyBlitWa};
|
||||
uint64_t gpuAddress = 0u;
|
||||
TaskCountType value = 0u;
|
||||
|
||||
@@ -385,6 +385,23 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(
|
||||
ze_event_handle_t *phWaitEvents) {
|
||||
ze_result_t ret = ZE_RESULT_SUCCESS;
|
||||
|
||||
if (isInOrderExecutionEnabled() && numWaitEvents == 0) {
|
||||
auto signalEvent = Event::fromHandle(hSignalEvent);
|
||||
bool earlyReturn = true;
|
||||
|
||||
if (signalEvent) {
|
||||
if (signalEvent->isEventTimestampFlagSet()) {
|
||||
earlyReturn = false;
|
||||
} else {
|
||||
signalEvent->enableInOrderExecMode(*this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter);
|
||||
}
|
||||
}
|
||||
|
||||
if (earlyReturn) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
checkAvailableSpace(numWaitEvents, false);
|
||||
checkWaitEventsState(numWaitEvents, phWaitEvents);
|
||||
|
||||
@@ -395,12 +395,6 @@ void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
|
||||
uint64_t gpuWriteAddress = 0;
|
||||
uint64_t writeValue = 0;
|
||||
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
postSyncMode = NEO::PostSyncMode::ImmediateData;
|
||||
gpuWriteAddress = this->inOrderDependencyCounterAllocation->getGpuAddress();
|
||||
writeValue = this->inOrderDependencyCounter + 1;
|
||||
}
|
||||
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), postSyncMode, gpuWriteAddress, writeValue, args);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1354,18 +1354,58 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEvents
|
||||
EXPECT_EQ(2u, sdiCmd->getDataDword0());
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierThenSignalSyncAllocation, IsAtLeastXeHpCore) {
|
||||
HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingBarrierThenSignalInOrderAllocation, IsAtLeastXeHpCore) {
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
auto immCmdList = createCopyOnlyImmCmdList<gfxCoreFamily>();
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, false);
|
||||
|
||||
auto eventHandle = events[0]->toHandle();
|
||||
|
||||
uint32_t copyData = 0;
|
||||
|
||||
immCmdList->appendMemoryCopy(©Data, ©Data, 1, eventHandle, 0, nullptr, false, false);
|
||||
|
||||
auto offset = cmdStream->getUsed();
|
||||
|
||||
immCmdList->appendBarrier(nullptr, 1, &eventHandle);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
ptrOffset(cmdStream->getCpuBase(), offset),
|
||||
(cmdStream->getUsed() - offset)));
|
||||
|
||||
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), sdiItor);
|
||||
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
|
||||
|
||||
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
|
||||
EXPECT_EQ(1u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(2u, sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWithWaitlistThenSignalSyncAllocation, IsAtLeastXeHpCore) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
auto eventPool = createEvents<FamilyType>(1, false);
|
||||
|
||||
auto eventHandle = events[0]->toHandle();
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, eventHandle, 0, nullptr, launchParams, false);
|
||||
|
||||
auto offset = cmdStream->getUsed();
|
||||
|
||||
immCmdList->appendBarrier(nullptr, 0, nullptr);
|
||||
immCmdList->appendBarrier(nullptr, 1, &eventHandle);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
@@ -1373,18 +1413,79 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierThenS
|
||||
(cmdStream->getUsed() - offset)));
|
||||
|
||||
auto pcItor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), pcItor);
|
||||
EXPECT_EQ(cmdList.end(), pcItor);
|
||||
|
||||
auto pcCmd = genCmdCast<PIPE_CONTROL *>(*pcItor);
|
||||
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), sdiItor);
|
||||
|
||||
auto gpuAddress = immCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
|
||||
auto lowAddress = static_cast<uint32_t>(gpuAddress & 0x0000FFFFFFFFULL);
|
||||
auto highAddress = static_cast<uint32_t>(gpuAddress >> 32);
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
|
||||
|
||||
EXPECT_EQ(lowAddress, pcCmd->getAddress());
|
||||
EXPECT_EQ(highAddress, pcCmd->getAddressHigh());
|
||||
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pcCmd->getPostSyncOperation());
|
||||
EXPECT_EQ(2u, pcCmd->getImmediateData());
|
||||
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
|
||||
EXPECT_EQ(1u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(2u, sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWithoutWaitlistThenInheritSignalSyncAllocation, IsAtLeastXeHpCore) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter);
|
||||
|
||||
auto offset = cmdStream->getUsed();
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, false);
|
||||
|
||||
auto eventHandle = events[0]->toHandle();
|
||||
|
||||
immCmdList->appendBarrier(nullptr, 0, nullptr);
|
||||
immCmdList->appendBarrier(eventHandle, 0, nullptr);
|
||||
|
||||
EXPECT_EQ(offset, cmdStream->getUsed());
|
||||
|
||||
EXPECT_EQ(1u, events[0]->inOrderExecSignalValue);
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWithoutWaitlistAndTimestampEventThenSignalSyncAllocation, IsAtLeastXeHpCore) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter);
|
||||
|
||||
auto offset = cmdStream->getUsed();
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, true);
|
||||
|
||||
auto eventHandle = events[0]->toHandle();
|
||||
|
||||
immCmdList->appendBarrier(eventHandle, 0, nullptr);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
ptrOffset(cmdStream->getCpuBase(), offset),
|
||||
(cmdStream->getUsed() - offset)));
|
||||
|
||||
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), sdiItor);
|
||||
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
|
||||
|
||||
EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress());
|
||||
EXPECT_EQ(1u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(2u, sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenCallingSyncThenHandleCompletion, IsAtLeastXeHpCore) {
|
||||
|
||||
Reference in New Issue
Block a user