Metrics Restore addressOffsetCCSOffset after query programming
Related-To: LOCI-2711 Signed-off-by: Joshua Santosh Ranjan <joshua.santosh.ranjan@intel.com>
This commit is contained in:
parent
b91cec5655
commit
d15eed035b
|
@ -153,6 +153,7 @@ struct CommandListCoreFamily : CommandListImp {
|
|||
size_t size, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||
void appendMultiPartitionPrologue(uint32_t partitionDataSize) override;
|
||||
void appendMultiPartitionEpilogue() override;
|
||||
|
||||
ze_result_t reserveSpace(size_t size, void **ptr) override;
|
||||
ze_result_t reset() override;
|
||||
|
|
|
@ -178,6 +178,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
|||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
|
||||
size_t estimatedSizeRequired = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl();
|
||||
|
|
|
@ -318,6 +318,17 @@ void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t
|
|||
true);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {
|
||||
|
||||
const size_t estimatedSizeRequired = sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM);
|
||||
increaseCommandStreamSpace(estimatedSizeRequired);
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeIMM(commandContainer,
|
||||
NEO::PartitionRegisters<GfxFamily>::addressOffsetCCSOffset,
|
||||
CommonConstants::partitionAddressOffset,
|
||||
true);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendComputeBarrierCommand() {
|
||||
NEO::PipeControlArgs args = createBarrierFlags();
|
||||
|
|
|
@ -25,6 +25,7 @@ struct CommandListImp : CommandList {
|
|||
ze_result_t appendMetricQueryEnd(zet_metric_query_handle_t hMetricQuery, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||
virtual void appendMultiPartitionPrologue(uint32_t partitionDataSize) = 0;
|
||||
virtual void appendMultiPartitionEpilogue() = 0;
|
||||
|
||||
protected:
|
||||
~CommandListImp() override = default;
|
||||
|
|
|
@ -327,6 +327,7 @@ struct MockCommandList : public CommandList {
|
|||
ze_command_list_flags_t flags));
|
||||
|
||||
ADDMETHOD_NOBASE_VOIDRETURN(appendMultiPartitionPrologue, (uint32_t partitionDataSize));
|
||||
ADDMETHOD_NOBASE_VOIDRETURN(appendMultiPartitionEpilogue, (void));
|
||||
|
||||
uint8_t *batchBuffer = nullptr;
|
||||
NEO::GraphicsAllocation *mockAllocation = nullptr;
|
||||
|
|
|
@ -79,6 +79,58 @@ HWTEST2_F(MultiPartitionPrologueTest, whenAppendMultiPartitionPrologueIsCalledTh
|
|||
auto result = commandList->close();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
}
|
||||
using MultiPartitionEpilogueTest = Test<DeviceFixture>;
|
||||
HWTEST2_F(MultiPartitionEpilogueTest, whenAppendMultiPartitionEpilogueIsCalledThenCommandListIsUpdated, IsAtLeastXeHpCore) {
|
||||
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue));
|
||||
auto &commandContainer = commandList->commandContainer;
|
||||
|
||||
ASSERT_NE(nullptr, commandContainer.getCommandStream());
|
||||
auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
CommandListImp *cmdListImp = static_cast<CommandListImp *>(commandList.get());
|
||||
cmdListImp->appendMultiPartitionEpilogue();
|
||||
|
||||
auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed();
|
||||
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
|
||||
|
||||
auto itorPc = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itorPc);
|
||||
|
||||
auto lriCmdPc = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorPc);
|
||||
ASSERT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, static_cast<uint64_t>(lriCmdPc->getRegisterOffset()));
|
||||
ASSERT_EQ(static_cast<uint32_t>(lriCmdPc->getDataDword()), CommonConstants::partitionAddressOffset);
|
||||
ASSERT_EQ(lriCmdPc->getMmioRemapEnable(), true);
|
||||
|
||||
auto result = commandList->close();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiPartitionEpilogueTest, whenAppendMultiPartitionPrologueIsCalledThenCommandListIsNotUpdated, IsAtMostGen12lp) {
|
||||
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue));
|
||||
auto &commandContainer = commandList->commandContainer;
|
||||
|
||||
ASSERT_NE(nullptr, commandContainer.getCommandStream());
|
||||
auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed();
|
||||
|
||||
CommandListImp *cmdListImp = static_cast<CommandListImp *>(commandList.get());
|
||||
cmdListImp->appendMultiPartitionEpilogue();
|
||||
|
||||
auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed();
|
||||
ASSERT_EQ(usedSpaceAfter, usedSpaceBefore);
|
||||
|
||||
auto result = commandList->close();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
|
|
@ -773,6 +773,7 @@ ze_result_t MetricQueryImp::writeMetricQuery(CommandList &commandList, ze_event_
|
|||
const size_t allocationSizeForSubDevice = pool.allocationSize / metricQueriesSize;
|
||||
static_cast<CommandListImp &>(commandList).appendMultiPartitionPrologue(static_cast<uint32_t>(allocationSizeForSubDevice));
|
||||
void *buffer = nullptr;
|
||||
bool gpuCommandStatus = true;
|
||||
|
||||
// Revert iteration to be ensured that the last set of gpu commands overwrite the previous written sets of gpu commands,
|
||||
// so only one of the sub-device contexts will be used to append to command list.
|
||||
|
@ -817,10 +818,15 @@ ze_result_t MetricQueryImp::writeMetricQuery(CommandList &commandList, ze_event_
|
|||
|
||||
// Obtain gpu commands from metrics library for each sub-device to update cpu and gpu addresses for
|
||||
// each query object in metrics library, so that get data works properly.
|
||||
if (!metricLibrarySubDevice.getGpuCommands(commandBuffer)) {
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
gpuCommandStatus = metricLibrarySubDevice.getGpuCommands(commandBuffer);
|
||||
if (!gpuCommandStatus) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
static_cast<CommandListImp &>(commandList).appendMultiPartitionEpilogue();
|
||||
if (!gpuCommandStatus) {
|
||||
return ZE_RESULT_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
// Write gpu commands for sub device index 0.
|
||||
} else {
|
||||
|
|
Loading…
Reference in New Issue