mirror of
https://github.com/intel/llvm.git
synced 2026-01-13 19:08:21 +08:00
[LIT] Workaround the 60 processed limit on Windows (#157759)
Python multiprocessing is limited to 60 workers at most:
6bc65c30ff/Lib/concurrent/futures/process.py (L669-L672)
The limit being per thread pool, we can work around it by using multiple
pools on windows when we want to actually use more workers.
This commit is contained in:
@@ -7,6 +7,14 @@ import lit.Test
|
||||
import lit.util
|
||||
import lit.worker
|
||||
|
||||
# Windows has a limit of 60 workers per pool.
|
||||
# This is defined in the multiprocessing module implementation.
|
||||
# See: https://github.com/python/cpython/blob/6bc65c30ff1fd0b581a2c93416496fc720bc442c/Lib/concurrent/futures/process.py#L669-L672
|
||||
WINDOWS_MAX_WORKERS_PER_POOL = 60
|
||||
|
||||
|
||||
def _ceilDiv(a, b):
|
||||
return (a + b - 1) // b
|
||||
|
||||
class MaxFailuresError(Exception):
|
||||
pass
|
||||
@@ -72,25 +80,65 @@ class Run(object):
|
||||
if v is not None
|
||||
}
|
||||
|
||||
pool = multiprocessing.Pool(
|
||||
self.workers, lit.worker.initialize, (self.lit_config, semaphores)
|
||||
# Windows has a limit of 60 workers per pool, so we need to use multiple pools
|
||||
# if we have more workers requested than the limit.
|
||||
# Also, allow to override the limit with the LIT_WINDOWS_MAX_WORKERS_PER_POOL environment variable.
|
||||
max_workers_per_pool = (
|
||||
WINDOWS_MAX_WORKERS_PER_POOL if os.name == "nt" else self.workers
|
||||
)
|
||||
max_workers_per_pool = int(
|
||||
os.getenv("LIT_WINDOWS_MAX_WORKERS_PER_POOL", max_workers_per_pool)
|
||||
)
|
||||
|
||||
async_results = [
|
||||
pool.apply_async(
|
||||
lit.worker.execute, args=[test], callback=self.progress_callback
|
||||
num_pools = max(1, _ceilDiv(self.workers, max_workers_per_pool))
|
||||
|
||||
# Distribute self.workers across num_pools as evenly as possible
|
||||
workers_per_pool_list = [self.workers // num_pools] * num_pools
|
||||
for pool_idx in range(self.workers % num_pools):
|
||||
workers_per_pool_list[pool_idx] += 1
|
||||
|
||||
if num_pools > 1:
|
||||
self.lit_config.note(
|
||||
"Using %d pools balancing %d workers total distributed as %s (Windows worker limit workaround)"
|
||||
% (num_pools, self.workers, workers_per_pool_list)
|
||||
)
|
||||
for test in self.tests
|
||||
]
|
||||
pool.close()
|
||||
|
||||
# Create multiple pools
|
||||
pools = []
|
||||
for pool_size in workers_per_pool_list:
|
||||
pool = multiprocessing.Pool(
|
||||
pool_size, lit.worker.initialize, (self.lit_config, semaphores)
|
||||
)
|
||||
pools.append(pool)
|
||||
|
||||
# Distribute tests across pools
|
||||
tests_per_pool = _ceilDiv(len(self.tests), num_pools)
|
||||
async_results = []
|
||||
|
||||
for pool_idx, pool in enumerate(pools):
|
||||
start_idx = pool_idx * tests_per_pool
|
||||
end_idx = min(start_idx + tests_per_pool, len(self.tests))
|
||||
for test in self.tests[start_idx:end_idx]:
|
||||
ar = pool.apply_async(
|
||||
lit.worker.execute, args=[test], callback=self.progress_callback
|
||||
)
|
||||
async_results.append(ar)
|
||||
|
||||
# Close all pools
|
||||
for pool in pools:
|
||||
pool.close()
|
||||
|
||||
try:
|
||||
self._wait_for(async_results, deadline)
|
||||
except:
|
||||
pool.terminate()
|
||||
# Terminate all pools on exception
|
||||
for pool in pools:
|
||||
pool.terminate()
|
||||
raise
|
||||
finally:
|
||||
pool.join()
|
||||
# Join all pools
|
||||
for pool in pools:
|
||||
pool.join()
|
||||
|
||||
def _wait_for(self, async_results, deadline):
|
||||
timeout = deadline - time.time()
|
||||
|
||||
@@ -114,11 +114,6 @@ def usable_core_count():
|
||||
except AttributeError:
|
||||
n = os.cpu_count() or 1
|
||||
|
||||
# On Windows with more than 60 processes, multiprocessing's call to
|
||||
# _winapi.WaitForMultipleObjects() prints an error and lit hangs.
|
||||
if platform.system() == "Windows":
|
||||
return min(n, 60)
|
||||
|
||||
return n
|
||||
|
||||
def abs_path_preserve_drive(path):
|
||||
|
||||
27
llvm/utils/lit/tests/windows-pools.py
Normal file
27
llvm/utils/lit/tests/windows-pools.py
Normal file
@@ -0,0 +1,27 @@
|
||||
# Create a directory with 20 files and check the number of pools and workers per pool that lit will use.
|
||||
|
||||
# RUN: rm -Rf %t.dir && mkdir -p %t.dir
|
||||
# RUN: python -c "for i in range(20): open(rf'%t.dir/file{i}.txt', 'w').write('RUN:')"
|
||||
|
||||
# RUN: echo "import lit.formats" > %t.dir/lit.cfg
|
||||
# RUN: echo "config.name = \"top-level-suite\"" >> %t.dir/lit.cfg
|
||||
# RUN: echo "config.suffixes = [\".txt\"]" >> %t.dir/lit.cfg
|
||||
# RUN: echo "config.test_format = lit.formats.ShTest()" >> %t.dir/lit.cfg
|
||||
|
||||
|
||||
# 15 workers per pool max, 100 workers total max: we expect lit to cap the workers to the number of files
|
||||
# RUN: env "LIT_WINDOWS_MAX_WORKERS_PER_POOL=15" %{lit} -s %t.dir/ -j100 > %t.out 2>&1
|
||||
# CHECK: Using 2 pools balancing 20 workers total distributed as [10, 10]
|
||||
# CHECK: Passed: 20
|
||||
|
||||
# 5 workers per pool max, 17 workers total max
|
||||
# RUN: env "LIT_WINDOWS_MAX_WORKERS_PER_POOL=5" %{lit} -s %t.dir/ -j17 >> %t.out 2>&1
|
||||
# CHECK: Using 4 pools balancing 17 workers total distributed as [5, 4, 4, 4]
|
||||
# CHECK: Passed: 20
|
||||
|
||||
# 19 workers per pool max, 19 workers total max
|
||||
# RUN: env "LIT_WINDOWS_MAX_WORKERS_PER_POOL=19" %{lit} -s %t.dir/ -j19 >> %t.out 2>&1
|
||||
# CHECK-NOT: workers total distributed as
|
||||
# CHECK: Passed: 20
|
||||
|
||||
# RUN: cat %t.out | FileCheck %s
|
||||
Reference in New Issue
Block a user