From 3724807eed888e522a2e51e404bf5767ba013ea2 Mon Sep 17 00:00:00 2001 From: "Yates, Brandon" Date: Thu, 20 Oct 2022 18:08:43 +0000 Subject: [PATCH] L0 debug - Fix thread creation for windows DSS Signed-off-by: Yates, Brandon --- .../hw_helpers/l0_hw_helper_tgllp_plus.inl | 13 ++++- .../sources/helper/l0_hw_helper_tests.cpp | 52 +++++++++++++++++++ .../tools/source/debug/debug_session_imp.cpp | 9 ++-- .../sources/debug/debug_session_tests.cpp | 37 +++++++++++++ 4 files changed, 106 insertions(+), 5 deletions(-) diff --git a/level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_plus.inl b/level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_plus.inl index 87172fee0f..7e54a548fc 100644 --- a/level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_plus.inl +++ b/level_zero/core/source/hw_helpers/l0_hw_helper_tgllp_plus.inl @@ -7,7 +7,11 @@ template <> void L0HwHelperHw::getAttentionBitmaskForSingleThreads(const std::vector &threads, const NEO::HardwareInfo &hwInfo, std::unique_ptr &bitmask, size_t &bitmaskSize) const { - const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + const bool useDSS = (hwInfo.gtSystemInfo.MaxDualSubSlicesSupported > 0) && + (hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxDualSubSlicesSupported) == 2 + ? true + : false; + const uint32_t numSubslicesPerSlice = (useDSS ? hwInfo.gtSystemInfo.MaxDualSubSlicesSupported : hwInfo.gtSystemInfo.MaxSubSlicesSupported) / hwInfo.gtSystemInfo.MaxSlicesSupported; const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8; const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u); @@ -36,7 +40,12 @@ void L0HwHelperHw::getAttentionBitmaskForSingleThreads(const std::vector template <> std::vector L0HwHelperHw::getThreadsFromAttentionBitmask(const NEO::HardwareInfo &hwInfo, uint32_t tile, const uint8_t *bitmask, const size_t bitmaskSize) const { - const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + + const bool useDSS = (hwInfo.gtSystemInfo.MaxDualSubSlicesSupported > 0) && + (hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxDualSubSlicesSupported) == 2 + ? true + : false; + const uint32_t numSubslicesPerSlice = (useDSS ? hwInfo.gtSystemInfo.MaxDualSubSlicesSupported : hwInfo.gtSystemInfo.MaxSubSlicesSupported) / hwInfo.gtSystemInfo.MaxSlicesSupported; const uint32_t numEuPerSubslice = std::min(hwInfo.gtSystemInfo.MaxEuPerSubSlice, 8u); const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); const uint32_t bytesPerEu = alignUp(numThreadsPerEu, 8) / 8; diff --git a/level_zero/core/test/unit_tests/sources/helper/l0_hw_helper_tests.cpp b/level_zero/core/test/unit_tests/sources/helper/l0_hw_helper_tests.cpp index e0c9bda1ae..2dc398d77c 100644 --- a/level_zero/core/test/unit_tests/sources/helper/l0_hw_helper_tests.cpp +++ b/level_zero/core/test/unit_tests/sources/helper/l0_hw_helper_tests.cpp @@ -377,6 +377,58 @@ HWTEST_F(L0HwHelperTest, givenBitmaskWithAttentionBitsForHalfOfThreadsWhenGettin using PlatformsWithFusedEus = IsWithinGfxCore; using L0HwHelperFusedEuTest = ::testing::Test; +HWTEST2_F(L0HwHelperTest, givenBitmaskWithAttentionBitsWithDSSWhenGettingThreadsThenSingleCorrectThreadReturned, PlatformsWithFusedEus) { + auto hwInfo = *NEO::defaultHwInfo.get(); + auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); + hwInfo.gtSystemInfo.MaxEuPerSubSlice = 8; + hwInfo.gtSystemInfo.MaxSubSlicesSupported = 64; + hwInfo.gtSystemInfo.MaxDualSubSlicesSupported = 32; + + std::unique_ptr bitmask; + size_t size = 0; + + uint32_t subsliceID = 3; + + std::vector threadsWithAtt; + threadsWithAtt.push_back({0, 0, subsliceID, 8, 0}); + + l0HwHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size); + + auto threads = l0HwHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), size); + + ASSERT_EQ(2u, threads.size()); + + EXPECT_EQ(0u, threads[0].slice); + EXPECT_EQ(subsliceID, threads[0].subslice); + EXPECT_EQ(8u, threads[0].eu); + EXPECT_EQ(0u, threads[0].thread); + EXPECT_EQ(0u, threads[0].tileIndex); + + EXPECT_EQ(0u, threads[1].slice); + EXPECT_EQ(subsliceID, threads[1].subslice); + EXPECT_EQ(12u, threads[1].eu); + EXPECT_EQ(0u, threads[1].thread); + EXPECT_EQ(0u, threads[1].tileIndex); + + hwInfo.gtSystemInfo.MaxDualSubSlicesSupported = 0; + l0HwHelper.getAttentionBitmaskForSingleThreads(threadsWithAtt, hwInfo, bitmask, size); + + threads = l0HwHelper.getThreadsFromAttentionBitmask(hwInfo, 0, bitmask.get(), size); + + ASSERT_EQ(2u, threads.size()); + EXPECT_EQ(0u, threads[0].slice); + EXPECT_EQ(subsliceID, threads[0].subslice); + EXPECT_EQ(8u, threads[0].eu); + EXPECT_EQ(0u, threads[0].thread); + EXPECT_EQ(0u, threads[0].tileIndex); + + EXPECT_EQ(0u, threads[1].slice); + EXPECT_EQ(subsliceID, threads[1].subslice); + EXPECT_EQ(12u, threads[1].eu); + EXPECT_EQ(0u, threads[1].thread); + EXPECT_EQ(0u, threads[1].tileIndex); +} + HWTEST2_F(L0HwHelperFusedEuTest, givenDynamicallyPopulatesSliceInfoGreaterThanMaxSlicesSupportedThenBitmasksAreCorrect, PlatformsWithFusedEus) { auto hwInfo = *NEO::defaultHwInfo.get(); auto &l0HwHelper = L0::L0HwHelper::get(hwInfo.platform.eRenderCoreFamily); diff --git a/level_zero/tools/source/debug/debug_session_imp.cpp b/level_zero/tools/source/debug/debug_session_imp.cpp index 1824bf0b4a..5924782552 100644 --- a/level_zero/tools/source/debug/debug_session_imp.cpp +++ b/level_zero/tools/source/debug/debug_session_imp.cpp @@ -31,13 +31,16 @@ void DebugSession::createEuThreads() { bool isSubDevice = connectedDevice->getNEODevice()->isSubDevice(); auto &hwInfo = connectedDevice->getHwInfo(); - const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; - const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice; + const bool useDSS = (hwInfo.gtSystemInfo.MaxDualSubSlicesSupported > 0) && + (hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxDualSubSlicesSupported) == 2 + ? true + : false; + const uint32_t numSubslicesPerSlice = (useDSS ? hwInfo.gtSystemInfo.MaxDualSubSlicesSupported : hwInfo.gtSystemInfo.MaxSubSlicesSupported) / hwInfo.gtSystemInfo.MaxSlicesSupported; + const uint32_t numEuPerSubslice = useDSS ? hwInfo.gtSystemInfo.MaxEuPerSubSlice * 2 : hwInfo.gtSystemInfo.MaxEuPerSubSlice; const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount); uint32_t subDeviceCount = std::max(1u, connectedDevice->getNEODevice()->getNumSubDevices()); UNRECOVERABLE_IF(isSubDevice && subDeviceCount > 1); - for (uint32_t tileIndex = 0; tileIndex < subDeviceCount; tileIndex++) { if (isSubDevice || subDeviceCount == 1) { diff --git a/level_zero/tools/test/unit_tests/sources/debug/debug_session_tests.cpp b/level_zero/tools/test/unit_tests/sources/debug/debug_session_tests.cpp index a83d3c5353..5c5df7bc86 100644 --- a/level_zero/tools/test/unit_tests/sources/debug/debug_session_tests.cpp +++ b/level_zero/tools/test/unit_tests/sources/debug/debug_session_tests.cpp @@ -363,6 +363,43 @@ struct MockDebugSession : public L0::DebugSessionImp { using DebugSessionTest = ::testing::Test; +TEST(DeviceWithDebugSessionTest, GivenDSSWhenCreatingThreadsThenAllThreadsHasCorrectValuesMapped) { + + auto hwInfo = *NEO::defaultHwInfo; + hwInfo.gtSystemInfo.MaxEuPerSubSlice = 8; + hwInfo.gtSystemInfo.MaxSubSlicesSupported = 64; + hwInfo.gtSystemInfo.MaxDualSubSlicesSupported = 32; + NEO::MockDevice *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); + Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); + + auto sessionMock = std::make_unique(zet_debug_config_t{0x1234}, &deviceImp); + const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxDualSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + + EXPECT_EQ(1u, sessionMock->allThreads.count(EuThread::ThreadId(0, 0, numSubslicesPerSlice - 1, 0, 0))); + + EXPECT_EQ(0u, sessionMock->allThreads.count(EuThread::ThreadId(0, 0, numSubslicesPerSlice + 1, 0, 0))); + + EXPECT_EQ(1u, sessionMock->allThreads.count(EuThread::ThreadId(0, 0, numSubslicesPerSlice - 1, 7, 0))); + + EXPECT_EQ(1u, sessionMock->allThreads.count(EuThread::ThreadId(0, 0, numSubslicesPerSlice - 1, 15, 0))); +} + +TEST(DeviceWithDebugSessionTest, GivenMaxDualSubslicesIsZeroWhenCreatingThreadsThenAllThreadsIsCorrectSize) { + + auto hwInfo = *NEO::defaultHwInfo; + hwInfo.gtSystemInfo.MaxEuPerSubSlice = 8; + hwInfo.gtSystemInfo.MaxSubSlicesSupported = 64; + hwInfo.gtSystemInfo.MaxDualSubSlicesSupported = 0; + NEO::MockDevice *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment(&hwInfo, 0)); + Mock deviceImp(neoDevice, neoDevice->getExecutionEnvironment()); + + auto sessionMock = std::make_unique(zet_debug_config_t{0x1234}, &deviceImp); + const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + + EXPECT_EQ(1u, sessionMock->allThreads.count(EuThread::ThreadId(0, 0, numSubslicesPerSlice - 1, 0, 0))); + EXPECT_EQ(0u, sessionMock->allThreads.count(EuThread::ThreadId(0, 0, numSubslicesPerSlice + 1, 0, 0))); +} + TEST(DeviceWithDebugSessionTest, GivenDeviceWithDebugSessionWhenCallingReleaseResourcesThenCloseConnectionIsCalled) { ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr driverHandle(new DriverHandleImp);