fix: use correct mask to extract IP in EUSS for all cores

Related-To: NEO-14920

Signed-off-by: shubham kumar <shubham.kumar@intel.com>
This commit is contained in:
shubham kumar 2025-05-12 15:43:11 +00:00 committed by Compute-Runtime-Automation
parent a5b82acf02
commit a61181f1f9
9 changed files with 44 additions and 3 deletions

View File

@ -108,6 +108,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper {
virtual bool stallIpDataMapUpdate(std::map<uint64_t, void *> &stallSumIpDataMap, const uint8_t *pRawIpData) = 0;
virtual void stallIpDataMapDelete(std::map<uint64_t, void *> &stallSumIpDataMap) = 0;
virtual uint32_t getIpSamplingMetricCount() = 0;
virtual uint64_t getIpSamplingIpMask() const = 0;
virtual bool synchronizedDispatchSupported() const = 0;
virtual bool implicitSynchronizedDispatchForCooperativeKernelsAllowed() const = 0;
virtual std::unique_ptr<NEO::TagAllocatorBase> getInOrderTimestampAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, NEO::MemoryManager *memoryManager, size_t initialTagCount, size_t packetsCountPerElement, size_t tagAlignment,
@ -165,6 +166,7 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper {
bool stallIpDataMapUpdate(std::map<uint64_t, void *> &stallSumIpDataMap, const uint8_t *pRawIpData) override;
void stallIpDataMapDelete(std::map<uint64_t, void *> &stallSumIpDataMap) override;
uint32_t getIpSamplingMetricCount() override;
uint64_t getIpSamplingIpMask() const override;
bool synchronizedDispatchSupported() const override;
bool implicitSynchronizedDispatchForCooperativeKernelsAllowed() const override;
std::unique_ptr<NEO::TagAllocatorBase> getInOrderTimestampAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, NEO::MemoryManager *memoryManager, size_t initialTagCount, size_t packetsCountPerElement, size_t tagAlignment,

View File

@ -144,4 +144,9 @@ std::vector<std::pair<const char *, const char *>> L0GfxCoreHelperHw<Family>::ge
return stallSamplingReportList;
}
template <typename Family>
uint64_t L0GfxCoreHelperHw<Family>::getIpSamplingIpMask() const {
return 0;
}
} // namespace L0

View File

@ -75,6 +75,7 @@ typedef struct StallSumIpDataXeCore {
#pragma pack()
constexpr uint32_t ipSamplingMetricCountXe2 = 11u;
constexpr uint64_t ipSamplingIpMaskXe2 = 0x1fffffff;
template <typename Family>
uint32_t L0GfxCoreHelperHw<Family>::getIpSamplingMetricCount() {
@ -101,7 +102,7 @@ bool L0GfxCoreHelperHw<Family>::stallIpDataMapUpdate(std::map<uint64_t, void *>
const uint8_t *tempAddr = pRawIpData;
uint64_t ip = 0ULL;
memcpy_s(reinterpret_cast<uint8_t *>(&ip), sizeof(ip), tempAddr, sizeof(ip));
ip &= 0x1fffffff;
ip &= ipSamplingIpMaskXe2;
StallSumIpDataXeCore_t *stallSumData = nullptr;
if (stallSumIpDataMap.count(ip) == 0) {
stallSumData = new StallSumIpDataXeCore_t{};
@ -210,6 +211,11 @@ std::vector<std::pair<const char *, const char *>> L0GfxCoreHelperHw<Family>::ge
return stallSamplingReportList;
}
template <typename Family>
uint64_t L0GfxCoreHelperHw<Family>::getIpSamplingIpMask() const {
return ipSamplingIpMaskXe2;
}
template <typename Family>
uint64_t L0GfxCoreHelperHw<Family>::getOaTimestampValidBits() const {
constexpr uint64_t oaTimestampValidBits = 56u;

View File

@ -69,6 +69,7 @@ typedef struct StallSumIpData {
#pragma pack()
constexpr uint32_t ipSamplingMetricCountXe = 10u;
constexpr uint64_t ipSamplingIpMaskXe = 0x1fffffff;
template <>
uint32_t L0GfxCoreHelperHw<Family>::getIpSamplingMetricCount() {
@ -96,7 +97,7 @@ bool L0GfxCoreHelperHw<Family>::stallIpDataMapUpdate(std::map<uint64_t, void *>
const uint8_t *tempAddr = pRawIpData;
uint64_t ip = 0ULL;
memcpy_s(reinterpret_cast<uint8_t *>(&ip), sizeof(ip), tempAddr, sizeof(ip));
ip &= 0x1fffffff;
ip &= ipSamplingIpMaskXe;
StallSumIpData_t *stallSumData = nullptr;
if (stallSumIpDataMap.count(ip) == 0) {
stallSumData = new StallSumIpData_t{};
@ -209,6 +210,11 @@ std::vector<std::pair<const char *, const char *>> L0GfxCoreHelperHw<Family>::ge
return stallSamplingReportList;
}
template <>
uint64_t L0GfxCoreHelperHw<Family>::getIpSamplingIpMask() const {
return ipSamplingIpMaskXe;
}
template class L0GfxCoreHelperHw<Family>;
} // namespace L0

View File

@ -110,5 +110,10 @@ GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenStallSumIpDataToTypedV
EXPECT_EQ(0u, ipDataValues.size());
}
GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenGetIpSamplingIpMaskIsCalledThenZeroIsReturned) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_EQ(0u, l0GfxCoreHelper.getIpSamplingIpMask());
}
} // namespace ult
} // namespace L0

View File

@ -139,6 +139,11 @@ XE2_HPG_CORETEST_F(L0GfxCoreHelperTestXe2Hpg, GivenXe2HpgWhenCheckingL0HelperFor
EXPECT_NE(0u, stallSumIpDataMap.size());
}
XE2_HPG_CORETEST_F(L0GfxCoreHelperTestXe2Hpg, GivenXe2HpgWhenCheckingL0HelperForGetIpSamplingIpMaskThenCorrectValueIsReturned) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_EQ(0x1fffffffull, l0GfxCoreHelper.getIpSamplingIpMask());
}
XE2_HPG_CORETEST_F(L0GfxCoreHelperTestXe2Hpg, GivenXe2HpgWhenCheckingL0HelperForGetOaTimestampValidBitsThenCorrectValueIsReturned) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_EQ(56u, l0GfxCoreHelper.getOaTimestampValidBits());

View File

@ -86,6 +86,11 @@ XE_HPC_CORETEST_F(L0GfxCoreHelperTestXeHpc, GivenXeHpcWhenCheckingL0HelperForDel
EXPECT_NE(0u, stallSumIpDataMap.size());
}
XE_HPC_CORETEST_F(L0GfxCoreHelperTestXeHpc, GivenXeHpcWhenCheckingL0HelperForGetIpSamplingIpMaskThenCorrectValueIsReturned) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_EQ(0x1fffffffull, l0GfxCoreHelper.getIpSamplingIpMask());
}
XE_HPC_CORETEST_F(L0GfxCoreHelperTestXeHpc, GivenXeHpcWhenCheckingL0HelperForGetOaTimestampValidBitsThenCorrectValueIsReturned) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_EQ(32u, l0GfxCoreHelper.getOaTimestampValidBits());

View File

@ -113,5 +113,10 @@ XE_HPG_CORETEST_F(L0GfxCoreHelperTestXeHpg, GivenXeHpgWhenStallSumIpDataToTypedV
EXPECT_EQ(0u, ipDataValues.size());
}
XE_HPG_CORETEST_F(L0GfxCoreHelperTestXeHpg, GivenXeHpgWhenGetIpSamplingIpMaskIsCalledThenZeroIsReturned) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_EQ(0u, l0GfxCoreHelper.getIpSamplingIpMask());
}
} // namespace ult
} // namespace L0

View File

@ -414,12 +414,14 @@ ze_result_t IpSamplingMetricGroupImp::getCalculatedMetricCount(const uint8_t *pR
return ZE_RESULT_ERROR_INVALID_SIZE;
}
DeviceImp *deviceImp = static_cast<DeviceImp *>(&this->getMetricSource().getMetricDeviceContext().getDevice());
auto &l0GfxCoreHelper = deviceImp->getNEODevice()->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
const uint32_t rawReportCount = static_cast<uint32_t>(rawDataSize) / rawReportSize;
for (const uint8_t *pRawIpData = pRawData; pRawIpData < pRawData + (rawReportCount * rawReportSize); pRawIpData += rawReportSize) {
uint64_t ip = 0ULL;
memcpy_s(reinterpret_cast<uint8_t *>(&ip), sizeof(ip), pRawIpData, sizeof(ip));
ip &= 0x1fffffff;
ip &= l0GfxCoreHelper.getIpSamplingIpMask();
stallReportIpCount.insert(ip);
}