diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe2_hpg_and_later.inl b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe2_hpg_and_later.inl index 36de5cd0f2..ce129e3a47 100644 --- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe2_hpg_and_later.inl +++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_xe2_hpg_and_later.inl @@ -35,4 +35,186 @@ bool L0GfxCoreHelperHw::forceDefaultUsmCompressionSupport() const { return true; } +/* + * Xe2 stall sample data item format: + * + * Bits Field + * 0 to 28 IP (addr) + * 29 to 36 tdr count + * 37 to 44 other count + * 45 to 52 control count + * 53 to 60 pipestall count + * 61 to 68 send count + * 69 to 76 dist_acc count + * 77 to 84 sbid count + * 85 to 92 sync count + * 93 to 100 inst_fetch count + * 101 to 108 active count + * Exid [109, 111] + * "EndFlag" - is always "1" 112 + * + * bytes at index 48 and 49, subSlice + * bytes at index 50 and 51, flags + * + * total size 64 bytes + */ + +#pragma pack(1) +typedef struct StallSumIpDataXe2 { + uint64_t tdrCount; + uint64_t otherCount; + uint64_t controlCount; + uint64_t pipeStallCount; + uint64_t sendCount; + uint64_t distAccCount; + uint64_t sbidCount; + uint64_t syncCount; + uint64_t instFetchCount; + uint64_t activeCount; +} StallSumIpDataXe2_t; +#pragma pack() + +constexpr uint32_t ipSamplingMetricCountXe2 = 11u; + +template +uint32_t L0GfxCoreHelperHw::getIpSamplingMetricCount() { + return ipSamplingMetricCountXe2; +} + +template +void L0GfxCoreHelperHw::stallIpDataMapDelete(std::map &stallSumIpDataMap) { + for (auto i = stallSumIpDataMap.begin(); i != stallSumIpDataMap.end(); i++) { + StallSumIpDataXe2_t *stallSumData = reinterpret_cast(i->second); + if (stallSumData) { + delete stallSumData; + i->second = nullptr; + } + } +} + +template +bool L0GfxCoreHelperHw::stallIpDataMapUpdate(std::map &stallSumIpDataMap, const uint8_t *pRawIpData) { + const uint8_t *tempAddr = pRawIpData; + uint64_t ip = 0ULL; + memcpy_s(reinterpret_cast(&ip), sizeof(ip), tempAddr, sizeof(ip)); + ip &= 0x1fffffff; + StallSumIpDataXe2_t *stallSumData = nullptr; + if (stallSumIpDataMap.count(ip) == 0) { + stallSumData = new StallSumIpDataXe2_t{}; + stallSumData->tdrCount = 0; + stallSumData->otherCount = 0; + stallSumData->controlCount = 0; + stallSumData->pipeStallCount = 0; + stallSumData->sendCount = 0; + stallSumData->distAccCount = 0; + stallSumData->sbidCount = 0; + stallSumData->syncCount = 0; + stallSumData->instFetchCount = 0; + stallSumData->activeCount = 0; + stallSumIpDataMap[ip] = stallSumData; + } else { + stallSumData = reinterpret_cast(stallSumIpDataMap[ip]); + } + tempAddr += ipStallSamplingOffset; + + auto getCount = [&tempAddr]() { + uint16_t tempCount = 0; + memcpy_s(reinterpret_cast(&tempCount), sizeof(tempCount), tempAddr, sizeof(tempCount)); + tempCount = (tempCount >> ipStallSamplingReportShift) & stallSamplingReportCategoryMask; + tempAddr += 1; + return static_cast(tempCount); + }; + + stallSumData->tdrCount += getCount(); + stallSumData->otherCount += getCount(); + stallSumData->controlCount += getCount(); + stallSumData->pipeStallCount += getCount(); + stallSumData->sendCount += getCount(); + stallSumData->distAccCount += getCount(); + stallSumData->sbidCount += getCount(); + stallSumData->syncCount += getCount(); + stallSumData->instFetchCount += getCount(); + stallSumData->activeCount += getCount(); + +#pragma pack(1) + struct StallCntrInfo { + uint16_t subslice; + uint16_t flags; + } stallCntrInfo = {}; +#pragma pack() + + tempAddr = pRawIpData + stallSamplingReportSubSliceAndFlagsOffset; + memcpy_s(reinterpret_cast(&stallCntrInfo), sizeof(stallCntrInfo), tempAddr, sizeof(stallCntrInfo)); + + constexpr int32_t overflowDropFlag = (1 << 8); + return stallCntrInfo.flags & overflowDropFlag; +} + +// Order of ipDataValues must match stallSamplingReportList +template +void L0GfxCoreHelperHw::stallSumIpDataToTypedValues(uint64_t ip, void *sumIpData, std::vector &ipDataValues) { + StallSumIpDataXe2_t *stallSumData = reinterpret_cast(sumIpData); + zet_typed_value_t tmpValueData; + tmpValueData.type = ZET_VALUE_TYPE_UINT64; + tmpValueData.value.ui64 = ip; + ipDataValues.push_back(tmpValueData); + + tmpValueData.type = ZET_VALUE_TYPE_UINT64; + tmpValueData.value.ui64 = stallSumData->activeCount; + ipDataValues.push_back(tmpValueData); + + tmpValueData.type = ZET_VALUE_TYPE_UINT64; + tmpValueData.value.ui64 = stallSumData->tdrCount; + ipDataValues.push_back(tmpValueData); + + tmpValueData.type = ZET_VALUE_TYPE_UINT64; + tmpValueData.value.ui64 = stallSumData->controlCount; + ipDataValues.push_back(tmpValueData); + + tmpValueData.type = ZET_VALUE_TYPE_UINT64; + tmpValueData.value.ui64 = stallSumData->pipeStallCount; + ipDataValues.push_back(tmpValueData); + + tmpValueData.type = ZET_VALUE_TYPE_UINT64; + tmpValueData.value.ui64 = stallSumData->sendCount; + ipDataValues.push_back(tmpValueData); + + tmpValueData.type = ZET_VALUE_TYPE_UINT64; + tmpValueData.value.ui64 = stallSumData->distAccCount; + ipDataValues.push_back(tmpValueData); + + tmpValueData.type = ZET_VALUE_TYPE_UINT64; + tmpValueData.value.ui64 = stallSumData->sbidCount; + ipDataValues.push_back(tmpValueData); + + tmpValueData.type = ZET_VALUE_TYPE_UINT64; + tmpValueData.value.ui64 = stallSumData->syncCount; + ipDataValues.push_back(tmpValueData); + + tmpValueData.type = ZET_VALUE_TYPE_UINT64; + tmpValueData.value.ui64 = stallSumData->instFetchCount; + ipDataValues.push_back(tmpValueData); + + tmpValueData.type = ZET_VALUE_TYPE_UINT64; + tmpValueData.value.ui64 = stallSumData->otherCount; + ipDataValues.push_back(tmpValueData); +} + +template +std::vector> L0GfxCoreHelperHw::getStallSamplingReportMetrics() const { + std::vector> stallSamplingReportList = { + {"Active", "Active cycles"}, + {"Tdr", "Stall on Timeout Detection and Recovery"}, + {"ControlStall", "Stall on control"}, + {"PipeStall", "Stall on pipe"}, + {"SendStall", "Stall on send"}, + {"DistStall", "Stall on distance"}, + {"SbidStall", "Stall on scoreboard"}, + {"SyncStall", "Stall on sync"}, + {"InstrFetchStall", "Stall on instruction fetch"}, + {"OtherStall", "Stall on other condition"}, + }; + return stallSamplingReportList; +} + } // namespace L0 diff --git a/level_zero/core/source/xe2_hpg_core/l0_gfx_core_helper_xe2_hpg_core.cpp b/level_zero/core/source/xe2_hpg_core/l0_gfx_core_helper_xe2_hpg_core.cpp index 843e51fb4e..fa00f735d4 100644 --- a/level_zero/core/source/xe2_hpg_core/l0_gfx_core_helper_xe2_hpg_core.cpp +++ b/level_zero/core/source/xe2_hpg_core/l0_gfx_core_helper_xe2_hpg_core.cpp @@ -33,188 +33,6 @@ uint32_t L0GfxCoreHelperHw::getGrfRegisterCount(uint32_t *regPtr) const return 128; } -/* - * Xe2 stall sample data item format: - * - * Bits Field - * 0 to 28 IP (addr) - * 29 to 36 tdr count - * 37 to 44 other count - * 45 to 52 control count - * 53 to 60 pipestall count - * 61 to 68 send count - * 69 to 76 dist_acc count - * 77 to 84 sbid count - * 85 to 92 sync count - * 93 to 100 inst_fetch count - * 101 to 108 active count - * Exid [109, 111] - * "EndFlag" - is always "1" 112 - * - * bytes at index 48 and 49, subSlice - * bytes at index 50 and 51, flags - * - * total size 64 bytes - */ - -#pragma pack(1) -typedef struct StallSumIpDataXe2 { - uint64_t tdrCount; - uint64_t otherCount; - uint64_t controlCount; - uint64_t pipeStallCount; - uint64_t sendCount; - uint64_t distAccCount; - uint64_t sbidCount; - uint64_t syncCount; - uint64_t instFetchCount; - uint64_t activeCount; -} StallSumIpDataXe2_t; -#pragma pack() - -constexpr uint32_t ipSamplingMetricCountXe2 = 11u; - -template -uint32_t L0GfxCoreHelperHw::getIpSamplingMetricCount() { - return ipSamplingMetricCountXe2; -} - -template -void L0GfxCoreHelperHw::stallIpDataMapDelete(std::map &stallSumIpDataMap) { - for (auto i = stallSumIpDataMap.begin(); i != stallSumIpDataMap.end(); i++) { - StallSumIpDataXe2_t *stallSumData = reinterpret_cast(i->second); - if (stallSumData) { - delete stallSumData; - i->second = nullptr; - } - } -} - -template -bool L0GfxCoreHelperHw::stallIpDataMapUpdate(std::map &stallSumIpDataMap, const uint8_t *pRawIpData) { - const uint8_t *tempAddr = pRawIpData; - uint64_t ip = 0ULL; - memcpy_s(reinterpret_cast(&ip), sizeof(ip), tempAddr, sizeof(ip)); - ip &= 0x1fffffff; - StallSumIpDataXe2_t *stallSumData = nullptr; - if (stallSumIpDataMap.count(ip) == 0) { - stallSumData = new StallSumIpDataXe2_t{}; - stallSumData->tdrCount = 0; - stallSumData->otherCount = 0; - stallSumData->controlCount = 0; - stallSumData->pipeStallCount = 0; - stallSumData->sendCount = 0; - stallSumData->distAccCount = 0; - stallSumData->sbidCount = 0; - stallSumData->syncCount = 0; - stallSumData->instFetchCount = 0; - stallSumData->activeCount = 0; - stallSumIpDataMap[ip] = stallSumData; - } else { - stallSumData = reinterpret_cast(stallSumIpDataMap[ip]); - } - tempAddr += ipStallSamplingOffset; - - auto getCount = [&tempAddr]() { - uint16_t tempCount = 0; - memcpy_s(reinterpret_cast(&tempCount), sizeof(tempCount), tempAddr, sizeof(tempCount)); - tempCount = (tempCount >> ipStallSamplingReportShift) & stallSamplingReportCategoryMask; - tempAddr += 1; - return static_cast(tempCount); - }; - - stallSumData->tdrCount += getCount(); - stallSumData->otherCount += getCount(); - stallSumData->controlCount += getCount(); - stallSumData->pipeStallCount += getCount(); - stallSumData->sendCount += getCount(); - stallSumData->distAccCount += getCount(); - stallSumData->sbidCount += getCount(); - stallSumData->syncCount += getCount(); - stallSumData->instFetchCount += getCount(); - stallSumData->activeCount += getCount(); - -#pragma pack(1) - struct StallCntrInfo { - uint16_t subslice; - uint16_t flags; - } stallCntrInfo = {}; -#pragma pack() - - tempAddr = pRawIpData + stallSamplingReportSubSliceAndFlagsOffset; - memcpy_s(reinterpret_cast(&stallCntrInfo), sizeof(stallCntrInfo), tempAddr, sizeof(stallCntrInfo)); - - constexpr int32_t overflowDropFlag = (1 << 8); - return stallCntrInfo.flags & overflowDropFlag; -} - -// Order of ipDataValues must match stallSamplingReportList -template -void L0GfxCoreHelperHw::stallSumIpDataToTypedValues(uint64_t ip, void *sumIpData, std::vector &ipDataValues) { - StallSumIpDataXe2_t *stallSumData = reinterpret_cast(sumIpData); - zet_typed_value_t tmpValueData; - tmpValueData.type = ZET_VALUE_TYPE_UINT64; - tmpValueData.value.ui64 = ip; - ipDataValues.push_back(tmpValueData); - - tmpValueData.type = ZET_VALUE_TYPE_UINT64; - tmpValueData.value.ui64 = stallSumData->activeCount; - ipDataValues.push_back(tmpValueData); - - tmpValueData.type = ZET_VALUE_TYPE_UINT64; - tmpValueData.value.ui64 = stallSumData->tdrCount; - ipDataValues.push_back(tmpValueData); - - tmpValueData.type = ZET_VALUE_TYPE_UINT64; - tmpValueData.value.ui64 = stallSumData->controlCount; - ipDataValues.push_back(tmpValueData); - - tmpValueData.type = ZET_VALUE_TYPE_UINT64; - tmpValueData.value.ui64 = stallSumData->pipeStallCount; - ipDataValues.push_back(tmpValueData); - - tmpValueData.type = ZET_VALUE_TYPE_UINT64; - tmpValueData.value.ui64 = stallSumData->sendCount; - ipDataValues.push_back(tmpValueData); - - tmpValueData.type = ZET_VALUE_TYPE_UINT64; - tmpValueData.value.ui64 = stallSumData->distAccCount; - ipDataValues.push_back(tmpValueData); - - tmpValueData.type = ZET_VALUE_TYPE_UINT64; - tmpValueData.value.ui64 = stallSumData->sbidCount; - ipDataValues.push_back(tmpValueData); - - tmpValueData.type = ZET_VALUE_TYPE_UINT64; - tmpValueData.value.ui64 = stallSumData->syncCount; - ipDataValues.push_back(tmpValueData); - - tmpValueData.type = ZET_VALUE_TYPE_UINT64; - tmpValueData.value.ui64 = stallSumData->instFetchCount; - ipDataValues.push_back(tmpValueData); - - tmpValueData.type = ZET_VALUE_TYPE_UINT64; - tmpValueData.value.ui64 = stallSumData->otherCount; - ipDataValues.push_back(tmpValueData); -} - -template -std::vector> L0GfxCoreHelperHw::getStallSamplingReportMetrics() const { - std::vector> stallSamplingReportList = { - {"Active", "Active cycles"}, - {"Tdr", "Stall on Timeout Detection and Recovery"}, - {"ControlStall", "Stall on control"}, - {"PipeStall", "Stall on pipe"}, - {"SendStall", "Stall on send"}, - {"DistStall", "Stall on distance"}, - {"SbidStall", "Stall on scoreboard"}, - {"SyncStall", "Stall on sync"}, - {"InstrFetchStall", "Stall on instruction fetch"}, - {"OtherStall", "Stall on other condition"}, - }; - return stallSamplingReportList; -} - #include "level_zero/core/source/helpers/l0_gfx_core_helper_factory_init.inl" template class L0GfxCoreHelperHw;