Metrics Restore addressOffsetCCSOffset after query programming

Related-To: LOCI-2711

Signed-off-by: Joshua Santosh Ranjan <joshua.santosh.ranjan@intel.com>
This commit is contained in:
Joshua Santosh Ranjan 2021-11-10 13:26:03 +00:00 committed by Compute-Runtime-Automation
parent b91cec5655
commit d15eed035b
7 changed files with 77 additions and 2 deletions

View File

@ -153,6 +153,7 @@ struct CommandListCoreFamily : CommandListImp {
size_t size, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
void appendMultiPartitionPrologue(uint32_t partitionDataSize) override;
void appendMultiPartitionEpilogue() override;
ze_result_t reserveSpace(size_t size, void **ptr) override;
ze_result_t reset() override;

View File

@ -178,6 +178,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
size_t estimatedSizeRequired = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl();

View File

@ -318,6 +318,17 @@ void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t
true);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {
const size_t estimatedSizeRequired = sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM);
increaseCommandStreamSpace(estimatedSizeRequired);
NEO::EncodeSetMMIO<GfxFamily>::encodeIMM(commandContainer,
NEO::PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
CommonConstants::partitionAddressOffset,
true);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
NEO::PipeControlArgs args = createBarrierFlags();

View File

@ -25,6 +25,7 @@ struct CommandListImp : CommandList {
ze_result_t appendMetricQueryEnd(zet_metric_query_handle_t hMetricQuery, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
virtual void appendMultiPartitionPrologue(uint32_t partitionDataSize) = 0;
virtual void appendMultiPartitionEpilogue() = 0;
protected:
~CommandListImp() override = default;

View File

@ -327,6 +327,7 @@ struct MockCommandList : public CommandList {
ze_command_list_flags_t flags));
ADDMETHOD_NOBASE_VOIDRETURN(appendMultiPartitionPrologue, (uint32_t partitionDataSize));
ADDMETHOD_NOBASE_VOIDRETURN(appendMultiPartitionEpilogue, (void));
uint8_t *batchBuffer = nullptr;
NEO::GraphicsAllocation *mockAllocation = nullptr;

View File

@ -79,6 +79,58 @@ HWTEST2_F(MultiPartitionPrologueTest, whenAppendMultiPartitionPrologueIsCalledTh
auto result = commandList->close();
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
}
using MultiPartitionEpilogueTest = Test<DeviceFixture>;
HWTEST2_F(MultiPartitionEpilogueTest, whenAppendMultiPartitionEpilogueIsCalledThenCommandListIsUpdated, IsAtLeastXeHpCore) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue));
auto &commandContainer = commandList->commandContainer;
ASSERT_NE(nullptr, commandContainer.getCommandStream());
auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed();
CommandListImp *cmdListImp = static_cast<CommandListImp *>(commandList.get());
cmdListImp->appendMultiPartitionEpilogue();
auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
auto itorPc = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorPc);
auto lriCmdPc = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorPc);
ASSERT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, static_cast<uint64_t>(lriCmdPc->getRegisterOffset()));
ASSERT_EQ(static_cast<uint32_t>(lriCmdPc->getDataDword()), CommonConstants::partitionAddressOffset);
ASSERT_EQ(lriCmdPc->getMmioRemapEnable(), true);
auto result = commandList->close();
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
}
HWTEST2_F(MultiPartitionEpilogueTest, whenAppendMultiPartitionPrologueIsCalledThenCommandListIsNotUpdated, IsAtMostGen12lp) {
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue));
auto &commandContainer = commandList->commandContainer;
ASSERT_NE(nullptr, commandContainer.getCommandStream());
auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed();
CommandListImp *cmdListImp = static_cast<CommandListImp *>(commandList.get());
cmdListImp->appendMultiPartitionEpilogue();
auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed();
ASSERT_EQ(usedSpaceAfter, usedSpaceBefore);
auto result = commandList->close();
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
}
} // namespace ult
} // namespace L0

View File

@ -773,6 +773,7 @@ ze_result_t MetricQueryImp::writeMetricQuery(CommandList &commandList, ze_event_
const size_t allocationSizeForSubDevice = pool.allocationSize / metricQueriesSize;
static_cast<CommandListImp &>(commandList).appendMultiPartitionPrologue(static_cast<uint32_t>(allocationSizeForSubDevice));
void *buffer = nullptr;
bool gpuCommandStatus = true;
// Revert iteration to be ensured that the last set of gpu commands overwrite the previous written sets of gpu commands,
// so only one of the sub-device contexts will be used to append to command list.
@ -817,10 +818,15 @@ ze_result_t MetricQueryImp::writeMetricQuery(CommandList &commandList, ze_event_
// Obtain gpu commands from metrics library for each sub-device to update cpu and gpu addresses for
// each query object in metrics library, so that get data works properly.
if (!metricLibrarySubDevice.getGpuCommands(commandBuffer)) {
return ZE_RESULT_ERROR_UNKNOWN;
gpuCommandStatus = metricLibrarySubDevice.getGpuCommands(commandBuffer);
if (!gpuCommandStatus) {
break;
}
}
static_cast<CommandListImp &>(commandList).appendMultiPartitionEpilogue();
if (!gpuCommandStatus) {
return ZE_RESULT_ERROR_UNKNOWN;
}
// Write gpu commands for sub device index 0.
} else {