fix: use correct mask to extract IP in EUSS for all cores
Related-To: NEO-14920 Signed-off-by: shubham kumar <shubham.kumar@intel.com>
This commit is contained in:
parent
a5b82acf02
commit
a61181f1f9
|
@ -108,6 +108,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper {
|
|||
virtual bool stallIpDataMapUpdate(std::map<uint64_t, void *> &stallSumIpDataMap, const uint8_t *pRawIpData) = 0;
|
||||
virtual void stallIpDataMapDelete(std::map<uint64_t, void *> &stallSumIpDataMap) = 0;
|
||||
virtual uint32_t getIpSamplingMetricCount() = 0;
|
||||
virtual uint64_t getIpSamplingIpMask() const = 0;
|
||||
virtual bool synchronizedDispatchSupported() const = 0;
|
||||
virtual bool implicitSynchronizedDispatchForCooperativeKernelsAllowed() const = 0;
|
||||
virtual std::unique_ptr<NEO::TagAllocatorBase> getInOrderTimestampAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, NEO::MemoryManager *memoryManager, size_t initialTagCount, size_t packetsCountPerElement, size_t tagAlignment,
|
||||
|
@ -165,6 +166,7 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper {
|
|||
bool stallIpDataMapUpdate(std::map<uint64_t, void *> &stallSumIpDataMap, const uint8_t *pRawIpData) override;
|
||||
void stallIpDataMapDelete(std::map<uint64_t, void *> &stallSumIpDataMap) override;
|
||||
uint32_t getIpSamplingMetricCount() override;
|
||||
uint64_t getIpSamplingIpMask() const override;
|
||||
bool synchronizedDispatchSupported() const override;
|
||||
bool implicitSynchronizedDispatchForCooperativeKernelsAllowed() const override;
|
||||
std::unique_ptr<NEO::TagAllocatorBase> getInOrderTimestampAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, NEO::MemoryManager *memoryManager, size_t initialTagCount, size_t packetsCountPerElement, size_t tagAlignment,
|
||||
|
|
|
@ -144,4 +144,9 @@ std::vector<std::pair<const char *, const char *>> L0GfxCoreHelperHw<Family>::ge
|
|||
return stallSamplingReportList;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint64_t L0GfxCoreHelperHw<Family>::getIpSamplingIpMask() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
|
|
@ -75,6 +75,7 @@ typedef struct StallSumIpDataXeCore {
|
|||
#pragma pack()
|
||||
|
||||
constexpr uint32_t ipSamplingMetricCountXe2 = 11u;
|
||||
constexpr uint64_t ipSamplingIpMaskXe2 = 0x1fffffff;
|
||||
|
||||
template <typename Family>
|
||||
uint32_t L0GfxCoreHelperHw<Family>::getIpSamplingMetricCount() {
|
||||
|
@ -101,7 +102,7 @@ bool L0GfxCoreHelperHw<Family>::stallIpDataMapUpdate(std::map<uint64_t, void *>
|
|||
const uint8_t *tempAddr = pRawIpData;
|
||||
uint64_t ip = 0ULL;
|
||||
memcpy_s(reinterpret_cast<uint8_t *>(&ip), sizeof(ip), tempAddr, sizeof(ip));
|
||||
ip &= 0x1fffffff;
|
||||
ip &= ipSamplingIpMaskXe2;
|
||||
StallSumIpDataXeCore_t *stallSumData = nullptr;
|
||||
if (stallSumIpDataMap.count(ip) == 0) {
|
||||
stallSumData = new StallSumIpDataXeCore_t{};
|
||||
|
@ -210,6 +211,11 @@ std::vector<std::pair<const char *, const char *>> L0GfxCoreHelperHw<Family>::ge
|
|||
return stallSamplingReportList;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint64_t L0GfxCoreHelperHw<Family>::getIpSamplingIpMask() const {
|
||||
return ipSamplingIpMaskXe2;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint64_t L0GfxCoreHelperHw<Family>::getOaTimestampValidBits() const {
|
||||
constexpr uint64_t oaTimestampValidBits = 56u;
|
||||
|
|
|
@ -69,6 +69,7 @@ typedef struct StallSumIpData {
|
|||
#pragma pack()
|
||||
|
||||
constexpr uint32_t ipSamplingMetricCountXe = 10u;
|
||||
constexpr uint64_t ipSamplingIpMaskXe = 0x1fffffff;
|
||||
|
||||
template <>
|
||||
uint32_t L0GfxCoreHelperHw<Family>::getIpSamplingMetricCount() {
|
||||
|
@ -96,7 +97,7 @@ bool L0GfxCoreHelperHw<Family>::stallIpDataMapUpdate(std::map<uint64_t, void *>
|
|||
const uint8_t *tempAddr = pRawIpData;
|
||||
uint64_t ip = 0ULL;
|
||||
memcpy_s(reinterpret_cast<uint8_t *>(&ip), sizeof(ip), tempAddr, sizeof(ip));
|
||||
ip &= 0x1fffffff;
|
||||
ip &= ipSamplingIpMaskXe;
|
||||
StallSumIpData_t *stallSumData = nullptr;
|
||||
if (stallSumIpDataMap.count(ip) == 0) {
|
||||
stallSumData = new StallSumIpData_t{};
|
||||
|
@ -209,6 +210,11 @@ std::vector<std::pair<const char *, const char *>> L0GfxCoreHelperHw<Family>::ge
|
|||
return stallSamplingReportList;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint64_t L0GfxCoreHelperHw<Family>::getIpSamplingIpMask() const {
|
||||
return ipSamplingIpMaskXe;
|
||||
}
|
||||
|
||||
template class L0GfxCoreHelperHw<Family>;
|
||||
|
||||
} // namespace L0
|
||||
|
|
|
@ -110,5 +110,10 @@ GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenStallSumIpDataToTypedV
|
|||
EXPECT_EQ(0u, ipDataValues.size());
|
||||
}
|
||||
|
||||
GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenGetIpSamplingIpMaskIsCalledThenZeroIsReturned) {
|
||||
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
||||
EXPECT_EQ(0u, l0GfxCoreHelper.getIpSamplingIpMask());
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
|
|
@ -139,6 +139,11 @@ XE2_HPG_CORETEST_F(L0GfxCoreHelperTestXe2Hpg, GivenXe2HpgWhenCheckingL0HelperFor
|
|||
EXPECT_NE(0u, stallSumIpDataMap.size());
|
||||
}
|
||||
|
||||
XE2_HPG_CORETEST_F(L0GfxCoreHelperTestXe2Hpg, GivenXe2HpgWhenCheckingL0HelperForGetIpSamplingIpMaskThenCorrectValueIsReturned) {
|
||||
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
||||
EXPECT_EQ(0x1fffffffull, l0GfxCoreHelper.getIpSamplingIpMask());
|
||||
}
|
||||
|
||||
XE2_HPG_CORETEST_F(L0GfxCoreHelperTestXe2Hpg, GivenXe2HpgWhenCheckingL0HelperForGetOaTimestampValidBitsThenCorrectValueIsReturned) {
|
||||
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
||||
EXPECT_EQ(56u, l0GfxCoreHelper.getOaTimestampValidBits());
|
||||
|
|
|
@ -86,6 +86,11 @@ XE_HPC_CORETEST_F(L0GfxCoreHelperTestXeHpc, GivenXeHpcWhenCheckingL0HelperForDel
|
|||
EXPECT_NE(0u, stallSumIpDataMap.size());
|
||||
}
|
||||
|
||||
XE_HPC_CORETEST_F(L0GfxCoreHelperTestXeHpc, GivenXeHpcWhenCheckingL0HelperForGetIpSamplingIpMaskThenCorrectValueIsReturned) {
|
||||
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
||||
EXPECT_EQ(0x1fffffffull, l0GfxCoreHelper.getIpSamplingIpMask());
|
||||
}
|
||||
|
||||
XE_HPC_CORETEST_F(L0GfxCoreHelperTestXeHpc, GivenXeHpcWhenCheckingL0HelperForGetOaTimestampValidBitsThenCorrectValueIsReturned) {
|
||||
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
||||
EXPECT_EQ(32u, l0GfxCoreHelper.getOaTimestampValidBits());
|
||||
|
|
|
@ -113,5 +113,10 @@ XE_HPG_CORETEST_F(L0GfxCoreHelperTestXeHpg, GivenXeHpgWhenStallSumIpDataToTypedV
|
|||
EXPECT_EQ(0u, ipDataValues.size());
|
||||
}
|
||||
|
||||
XE_HPG_CORETEST_F(L0GfxCoreHelperTestXeHpg, GivenXeHpgWhenGetIpSamplingIpMaskIsCalledThenZeroIsReturned) {
|
||||
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
||||
EXPECT_EQ(0u, l0GfxCoreHelper.getIpSamplingIpMask());
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
|
|
@ -414,12 +414,14 @@ ze_result_t IpSamplingMetricGroupImp::getCalculatedMetricCount(const uint8_t *pR
|
|||
return ZE_RESULT_ERROR_INVALID_SIZE;
|
||||
}
|
||||
|
||||
DeviceImp *deviceImp = static_cast<DeviceImp *>(&this->getMetricSource().getMetricDeviceContext().getDevice());
|
||||
auto &l0GfxCoreHelper = deviceImp->getNEODevice()->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
|
||||
const uint32_t rawReportCount = static_cast<uint32_t>(rawDataSize) / rawReportSize;
|
||||
|
||||
for (const uint8_t *pRawIpData = pRawData; pRawIpData < pRawData + (rawReportCount * rawReportSize); pRawIpData += rawReportSize) {
|
||||
uint64_t ip = 0ULL;
|
||||
memcpy_s(reinterpret_cast<uint8_t *>(&ip), sizeof(ip), pRawIpData, sizeof(ip));
|
||||
ip &= 0x1fffffff;
|
||||
ip &= l0GfxCoreHelper.getIpSamplingIpMask();
|
||||
stallReportIpCount.insert(ip);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue