mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
L0 debug - fix windows bitmask decode
Keep threads created in EU range (0,7) Signed-off-by: Yates, Brandon <brandon.yates@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ac426f7fdc
commit
99ef6c499b
@@ -7,7 +7,8 @@
|
||||
|
||||
template <>
|
||||
void L0HwHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector<EuThread::ThreadId> &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr<uint8_t[]> &bitmask, size_t &bitmaskSize) const {
|
||||
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
|
||||
const uint32_t numSubslicesPerSlice = (hwInfo.gtSystemInfo.MaxEuPerSubSlice == 8) ? hwInfo.gtSystemInfo.MaxDualSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported : hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
|
||||
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
|
||||
@@ -24,11 +25,20 @@ void L0HwHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector
|
||||
|
||||
for (auto &thread : threads) {
|
||||
uint8_t *sliceData = ptrOffset(bitmask.get(), threadsSizePerSlice * thread.slice);
|
||||
uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice);
|
||||
uint8_t *euData;
|
||||
|
||||
if (hwInfo.gtSystemInfo.MaxEuPerSubSlice == 8) {
|
||||
uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * (thread.subslice / 2));
|
||||
auto eu = thread.eu % eusPerRow;
|
||||
auto dualEu = thread.subslice % 2;
|
||||
euData = ptrOffset(subsliceData, bytesPerEu * (eu + dualEu * eusPerRow));
|
||||
} else {
|
||||
uint8_t *subsliceData = ptrOffset(sliceData, numEuPerSubslice * bytesPerEu * thread.subslice);
|
||||
auto eu = thread.eu % eusPerRow;
|
||||
auto dualEu = thread.eu / (numberOfRows * eusPerRow);
|
||||
euData = ptrOffset(subsliceData, bytesPerEu * (eu + dualEu * eusPerRow));
|
||||
}
|
||||
|
||||
auto eu = thread.eu % eusPerRow;
|
||||
auto dualEu = thread.eu / (numberOfRows * eusPerRow);
|
||||
uint8_t *euData = ptrOffset(subsliceData, bytesPerEu * (eu + dualEu * eusPerRow));
|
||||
UNRECOVERABLE_IF(thread.thread > 7);
|
||||
*euData |= (1 << thread.thread);
|
||||
}
|
||||
@@ -36,7 +46,8 @@ void L0HwHelperHw<Family>::getAttentionBitmaskForSingleThreads(const std::vector
|
||||
|
||||
template <>
|
||||
std::vector<EuThread::ThreadId> L0HwHelperHw<Family>::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const {
|
||||
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
|
||||
const uint32_t numSubslicesPerSlice = (hwInfo.gtSystemInfo.MaxEuPerSubSlice == 8) ? hwInfo.gtSystemInfo.MaxDualSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported : hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u);
|
||||
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8;
|
||||
@@ -66,8 +77,13 @@ std::vector<EuThread::ThreadId> L0HwHelperHw<Family>::getThreadsFromAttentionBit
|
||||
std::bitset<8> bits(bitmask[offset]);
|
||||
for (uint32_t i = 0; i < 8; i++) {
|
||||
if (bits.test(i)) {
|
||||
threads.emplace_back(tile, slice, subslice, euIndex + numEuPerSubslice * dualEu, i);
|
||||
threads.emplace_back(tile, slice, subslice, euIndex + eusPerRow + numEuPerSubslice * dualEu, i);
|
||||
if (hwInfo.gtSystemInfo.MaxEuPerSubSlice == 8) {
|
||||
threads.emplace_back(tile, slice, (subslice * 2) + dualEu, euIndex, i);
|
||||
threads.emplace_back(tile, slice, (subslice * 2) + dualEu, euIndex + eusPerRow, i);
|
||||
} else {
|
||||
threads.emplace_back(tile, slice, subslice, euIndex + numEuPerSubslice * dualEu, i);
|
||||
threads.emplace_back(tile, slice, subslice, euIndex + eusPerRow + numEuPerSubslice * dualEu, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -378,6 +378,41 @@ HWTEST_F(L0HwHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettin
|
||||
using PlatformsWithFusedEus = IsWithinGfxCore<IGFX_GEN12LP_CORE, IGFX_XE_HPG_CORE>;
|
||||
using L0HwHelperFusedEuTest = ::testing::Test;
|
||||
|
||||
HWTEST2_F(L0HwHelperTest, givenBitmaskWithAttentionBitsWith8EUSSWhenGettingThreadsThenSingleCorrectThreadReturned, PlatformsWithFusedEus) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
hwInfo.gtSystemInfo.MaxEuPerSubSlice = 8;
|
||||
hwInfo.gtSystemInfo.MaxSubSlicesSupported = 64;
|
||||
hwInfo.gtSystemInfo.MaxDualSubSlicesSupported = 32;
|
||||
|
||||
std::unique_ptr<uint8_t[]> bitmask;
|
||||
size_t size = 0;
|
||||
|
||||
for (uint32_t subsliceID = 0; subsliceID < 2; subsliceID++) {
|
||||
|
||||
std::vector<EuThread::ThreadId> threadsWithAtt;
|
||||
threadsWithAtt.push_back({0, 0, subsliceID, 0, 0});
|
||||
|
||||
l0HwHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size);
|
||||
|
||||
auto threads = l0HwHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), size);
|
||||
|
||||
ASSERT_EQ(2u, threads.size());
|
||||
|
||||
EXPECT_EQ(0u, threads[0].slice);
|
||||
EXPECT_EQ(subsliceID, threads[0].subslice);
|
||||
EXPECT_EQ(0u, threads[0].eu);
|
||||
EXPECT_EQ(0u, threads[0].thread);
|
||||
EXPECT_EQ(0u, threads[0].tileIndex);
|
||||
|
||||
EXPECT_EQ(0u, threads[1].slice);
|
||||
EXPECT_EQ(subsliceID, threads[1].subslice);
|
||||
EXPECT_EQ(4u, threads[1].eu);
|
||||
EXPECT_EQ(0u, threads[1].thread);
|
||||
EXPECT_EQ(0u, threads[1].tileIndex);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(L0HwHelperFusedEuTest, givenDynamicallyPopulatesSliceInfoGreaterThanMaxSlicesSupportedThenBitmasksAreCorrect, PlatformsWithFusedEus) {
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
|
||||
@@ -32,11 +32,10 @@ void DebugSession::createEuThreads() {
|
||||
bool isSubDevice = connectedDevice->getNEODevice()->isSubDevice();
|
||||
|
||||
auto &hwInfo = connectedDevice->getHwInfo();
|
||||
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numSubslicesPerSlice = std::max(hwInfo.gtSystemInfo.MaxSubSlicesSupported, hwInfo.gtSystemInfo.MaxDualSubSlicesSupported) / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
|
||||
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
uint32_t subDeviceCount = std::max(1u, connectedDevice->getNEODevice()->getNumSubDevices());
|
||||
|
||||
UNRECOVERABLE_IF(isSubDevice && subDeviceCount > 1);
|
||||
|
||||
for (uint32_t tileIndex = 0; tileIndex < subDeviceCount; tileIndex++) {
|
||||
@@ -139,6 +138,7 @@ ze_device_thread_t DebugSession::convertToApi(EuThread::ThreadId threadId) {
|
||||
}
|
||||
|
||||
std::vector<EuThread::ThreadId> DebugSession::getSingleThreadsForDevice(uint32_t deviceIndex, ze_device_thread_t physicalThread, const NEO::HardwareInfo &hwInfo) {
|
||||
|
||||
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
|
||||
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
|
||||
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
|
||||
|
||||
@@ -552,7 +552,6 @@ ze_result_t DebugSessionWindows::resumeImp(const std::vector<EuThread::ThreadId>
|
||||
std::unique_ptr<uint8_t[]> bitmask;
|
||||
size_t bitmaskSize = 0;
|
||||
l0HwHelper.getAttentionBitmaskForSingleThreads(threads, hwInfo, bitmask, bitmaskSize);
|
||||
applyResumeWa(bitmask.get(), bitmaskSize);
|
||||
printBitmask(bitmask.get(), bitmaskSize);
|
||||
|
||||
KM_ESCAPE_INFO escapeInfo = {0};
|
||||
|
||||
@@ -51,6 +51,28 @@ TEST(DebugSessionTest, givenNullDeviceWhenDebugSessionCreatedThenAllThreadsAreEm
|
||||
EXPECT_TRUE(sessionMock->allThreads.empty());
|
||||
}
|
||||
|
||||
TEST(DebugSessionTest, givenApplyResumeWaCalledThenWAIsApplied) {
|
||||
zet_debug_config_t config = {};
|
||||
config.pid = 0x1234;
|
||||
auto hwInfo = *NEO::defaultHwInfo.get();
|
||||
|
||||
NEO::MockDevice *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0));
|
||||
Mock<L0::DeviceImp> deviceImp(neoDevice, neoDevice->getExecutionEnvironment());
|
||||
|
||||
auto sessionMock = std::make_unique<MockDebugSession>(config, &deviceImp);
|
||||
auto &l0HwHelper = L0HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
|
||||
size_t bitmaskSize = 32;
|
||||
auto bitmask = std::make_unique<uint8_t[]>(bitmaskSize);
|
||||
bitmask.get()[0] = 1;
|
||||
sessionMock->applyResumeWa(bitmask.get(), bitmaskSize);
|
||||
if (l0HwHelper.isResumeWARequired()) {
|
||||
EXPECT_EQ(1, bitmask.get()[4]);
|
||||
} else {
|
||||
EXPECT_EQ(0, bitmask.get()[4]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DebugSessionTest, givenAllStoppedThreadsWhenInterruptCalledThenErrorNotAvailableReturned) {
|
||||
zet_debug_config_t config = {};
|
||||
config.pid = 0x1234;
|
||||
|
||||
@@ -137,6 +137,7 @@ struct MockDebugSession : public L0::DebugSessionImp {
|
||||
using L0::DebugSession::allThreads;
|
||||
using L0::DebugSession::debugArea;
|
||||
|
||||
using L0::DebugSessionImp::applyResumeWa;
|
||||
using L0::DebugSessionImp::calculateThreadSlotOffset;
|
||||
using L0::DebugSessionImp::checkTriggerEventsForAttention;
|
||||
using L0::DebugSessionImp::fillResumeAndStoppedThreadsFromNewlyStopped;
|
||||
|
||||
@@ -1799,7 +1799,7 @@ TEST_F(DebugApiWindowsTest, GivenErrorCasesWhenResumeImpIsCalledThenErrorIsRetur
|
||||
EXPECT_EQ(2u, mockWddm->dbgUmdEscapeActionCalled[DBGUMD_ACTION_EU_CONTROL_CLR_ATT_BIT]);
|
||||
}
|
||||
|
||||
TEST_F(DebugApiWindowsTest, GivenResumeWARequiredWhenCallingResumeThenWaIsAppliedToBitmask) {
|
||||
TEST_F(DebugApiWindowsTest, GivenResumeImpCalledThenBitmaskIsCorrect) {
|
||||
auto session = std::make_unique<MockDebugSessionWindows>(zet_debug_config_t{0x1234}, device);
|
||||
ASSERT_NE(nullptr, session);
|
||||
|
||||
@@ -1821,29 +1821,7 @@ TEST_F(DebugApiWindowsTest, GivenResumeWARequiredWhenCallingResumeThenWaIsApplie
|
||||
|
||||
auto bitmask = mockWddm->euControlBitmask.get();
|
||||
EXPECT_EQ(1u, bitmask[0]);
|
||||
|
||||
auto &l0HwHelper = L0HwHelper::get(neoDevice->getHardwareInfo().platform.eRenderCoreFamily);
|
||||
if (l0HwHelper.isResumeWARequired()) {
|
||||
EXPECT_EQ(1u, bitmask[4]);
|
||||
} else {
|
||||
EXPECT_EQ(0u, bitmask[4]);
|
||||
}
|
||||
|
||||
thread = {0, 0, 4, 0};
|
||||
session->allThreads[EuThread::ThreadId(0, thread)]->stopThread(1u);
|
||||
|
||||
result = session->resume(thread);
|
||||
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
bitmask = mockWddm->euControlBitmask.get();
|
||||
|
||||
if (l0HwHelper.isResumeWARequired()) {
|
||||
EXPECT_EQ(1u, bitmask[0]);
|
||||
EXPECT_EQ(1u, bitmask[4]);
|
||||
} else {
|
||||
EXPECT_EQ(0u, bitmask[0]);
|
||||
EXPECT_EQ(1u, bitmask[4]);
|
||||
}
|
||||
EXPECT_EQ(0u, bitmask[4]);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
|
||||
Reference in New Issue
Block a user