mtest: fix unencodable XML chars
Replace unencodable XML chars with their printable representation, so that, xmllint can parse test outputs without error. Closes #9894 Co-authored-by: Tristan Partin <tristan@partin.io>
This commit is contained in:
parent
61984bcfa3
commit
bd3d2cf918
|
@ -72,6 +72,26 @@ GNU_ERROR_RETURNCODE = 99
|
|||
# Exit if 3 Ctrl-C's are received within one second
|
||||
MAX_CTRLC = 3
|
||||
|
||||
# Define unencodable xml characters' regex for replacing them with their
|
||||
# printable representation
|
||||
UNENCODABLE_XML_UNICHRS: T.List[T.Tuple[int, int]] = [
|
||||
(0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x1F), (0x7F, 0x84),
|
||||
(0x86, 0x9F), (0xFDD0, 0xFDEF), (0xFFFE, 0xFFFF)]
|
||||
# Not narrow build
|
||||
if sys.maxunicode >= 0x10000:
|
||||
UNENCODABLE_XML_UNICHRS.extend([
|
||||
(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF),
|
||||
(0x3FFFE, 0x3FFFF), (0x4FFFE, 0x4FFFF),
|
||||
(0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF),
|
||||
(0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF),
|
||||
(0x9FFFE, 0x9FFFF), (0xAFFFE, 0xAFFFF),
|
||||
(0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF),
|
||||
(0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF),
|
||||
(0xFFFFE, 0xFFFFF), (0x10FFFE, 0x10FFFF)])
|
||||
UNENCODABLE_XML_CHR_RANGES = [fr'{chr(low)}-{chr(high)}' for (low, high) in UNENCODABLE_XML_UNICHRS]
|
||||
UNENCODABLE_XML_CHRS_RE = re.compile('([' + ''.join(UNENCODABLE_XML_CHR_RANGES) + '])')
|
||||
|
||||
|
||||
def is_windows() -> bool:
|
||||
platname = platform.system().lower()
|
||||
return platname == 'windows'
|
||||
|
@ -1148,14 +1168,21 @@ class TestRunRust(TestRun):
|
|||
|
||||
TestRun.PROTOCOL_TO_CLASS[TestProtocol.RUST] = TestRunRust
|
||||
|
||||
# Check unencodable characters in xml output and replace them with
|
||||
# their printable representation
|
||||
def replace_unencodable_xml_chars(original_str: str) -> str:
|
||||
# [1:-1] is needed for removing `'` characters from both start and end
|
||||
# of the string
|
||||
replacement_lambda = lambda illegal_chr: repr(illegal_chr.group())[1:-1]
|
||||
return UNENCODABLE_XML_CHRS_RE.sub(replacement_lambda, original_str)
|
||||
|
||||
def decode(stream: T.Union[None, bytes]) -> str:
|
||||
if stream is None:
|
||||
return ''
|
||||
try:
|
||||
return stream.decode('utf-8')
|
||||
return replace_unencodable_xml_chars(stream.decode('utf-8'))
|
||||
except UnicodeDecodeError:
|
||||
return stream.decode('iso-8859-1', errors='ignore')
|
||||
return replace_unencodable_xml_chars(stream.decode('iso-8859-1', errors='ignore'))
|
||||
|
||||
async def read_decode(reader: asyncio.StreamReader,
|
||||
queue: T.Optional['asyncio.Queue[T.Optional[str]]'],
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
project('replace unencodable xml chars')
|
||||
|
||||
test_script = find_program('script.py')
|
||||
test('main', test_script)
|
|
@ -0,0 +1,37 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
|
||||
# Print base string(\nHello Meson\n) to see valid chars are not replaced
|
||||
print('\n\x48\x65\x6c\x6c\x6f\x20\x4d\x65\x73\x6f\x6e\n')
|
||||
# Print invalid input from all known unencodable chars
|
||||
print(
|
||||
'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11'
|
||||
'\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f')
|
||||
|
||||
# Cover for potential encoding issues
|
||||
try:
|
||||
print(
|
||||
'\x80\x81\x82\x83\x84\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f'
|
||||
'\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e'
|
||||
'\x9f\ufdd0\ufdd1\ufdd2\ufdd3\ufdd4\ufdd5\ufdd6\ufdd7\ufdd8'
|
||||
'\ufdd9\ufdda\ufddb\ufddc\ufddd\ufdde\ufddf\ufde0\ufde1'
|
||||
'\ufde2\ufde3\ufde4\ufde5\ufde6\ufde7\ufde8\ufde9\ufdea'
|
||||
'\ufdeb\ufdec\ufded\ufdee\ufdef\ufffe\uffff')
|
||||
except:
|
||||
pass
|
||||
|
||||
# Cover for potential encoding issues
|
||||
try:
|
||||
if sys.maxunicode >= 0x10000:
|
||||
print(
|
||||
'\U0001fffe\U0001ffff\U0002fffe\U0002ffff'
|
||||
'\U0003fffe\U0003ffff\U0004fffe\U0004ffff'
|
||||
'\U0005fffe\U0005ffff\U0006fffe\U0006ffff'
|
||||
'\U0007fffe\U0007ffff\U0008fffe\U0008ffff'
|
||||
'\U0009fffe\U0009ffff\U000afffe\U000affff'
|
||||
'\U000bfffe\U000bffff\U000cfffe\U000cffff'
|
||||
'\U000dfffe\U000dffff\U000efffe\U000effff'
|
||||
'\U000ffffe\U000fffff\U0010fffe\U0010ffff')
|
||||
except:
|
||||
pass
|
|
@ -59,6 +59,7 @@ from mesonbuild.linkers import linkers
|
|||
|
||||
from mesonbuild.dependencies.pkgconfig import PkgConfigDependency
|
||||
from mesonbuild.build import Target, ConfigurationData, Executable, SharedLibrary, StaticLibrary
|
||||
from mesonbuild import mtest
|
||||
import mesonbuild.modules.pkgconfig
|
||||
from mesonbuild.scripts import destdir_join
|
||||
|
||||
|
@ -398,6 +399,56 @@ class AllPlatformTests(BasePlatformTests):
|
|||
self.assertTrue(compdb[3]['file'].endswith("libfile4.c"))
|
||||
# FIXME: We don't have access to the linker command
|
||||
|
||||
def test_replace_unencodable_xml_chars(self):
|
||||
'''
|
||||
Test that unencodable xml chars are replaced with their
|
||||
printable representation
|
||||
https://github.com/mesonbuild/meson/issues/9894
|
||||
'''
|
||||
# Create base string(\nHello Meson\n) to see valid chars are not replaced
|
||||
base_string_invalid = '\n\x48\x65\x6c\x6c\x6f\x20\x4d\x65\x73\x6f\x6e\n'
|
||||
base_string_valid = '\nHello Meson\n'
|
||||
# Create invalid input from all known unencodable chars
|
||||
invalid_string = (
|
||||
'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c\x0e\x0f\x10\x11'
|
||||
'\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f'
|
||||
'\x80\x81\x82\x83\x84\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f'
|
||||
'\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e'
|
||||
'\x9f\ufdd0\ufdd1\ufdd2\ufdd3\ufdd4\ufdd5\ufdd6\ufdd7\ufdd8'
|
||||
'\ufdd9\ufdda\ufddb\ufddc\ufddd\ufdde\ufddf\ufde0\ufde1'
|
||||
'\ufde2\ufde3\ufde4\ufde5\ufde6\ufde7\ufde8\ufde9\ufdea'
|
||||
'\ufdeb\ufdec\ufded\ufdee\ufdef\ufffe\uffff')
|
||||
if sys.maxunicode >= 0x10000:
|
||||
invalid_string = invalid_string + (
|
||||
'\U0001fffe\U0001ffff\U0002fffe\U0002ffff'
|
||||
'\U0003fffe\U0003ffff\U0004fffe\U0004ffff'
|
||||
'\U0005fffe\U0005ffff\U0006fffe\U0006ffff'
|
||||
'\U0007fffe\U0007ffff\U0008fffe\U0008ffff'
|
||||
'\U0009fffe\U0009ffff\U000afffe\U000affff'
|
||||
'\U000bfffe\U000bffff\U000cfffe\U000cffff'
|
||||
'\U000dfffe\U000dffff\U000efffe\U000effff'
|
||||
'\U000ffffe\U000fffff\U0010fffe\U0010ffff')
|
||||
|
||||
valid_string = base_string_valid + repr(invalid_string)[1:-1] + base_string_valid
|
||||
invalid_string = base_string_invalid + invalid_string + base_string_invalid
|
||||
broken_xml_stream = invalid_string.encode()
|
||||
decoded_broken_stream = mtest.decode(broken_xml_stream)
|
||||
self.assertEqual(decoded_broken_stream, valid_string)
|
||||
|
||||
def test_replace_unencodable_xml_chars_unit(self):
|
||||
'''
|
||||
Test that unencodable xml chars are replaced with their
|
||||
printable representation
|
||||
https://github.com/mesonbuild/meson/issues/9894
|
||||
'''
|
||||
if not shutil.which('xmllint'):
|
||||
raise SkipTest('xmllint not installed')
|
||||
testdir = os.path.join(self.unit_test_dir, '110 replace unencodable xml chars')
|
||||
self.init(testdir)
|
||||
self.run_tests()
|
||||
junit_xml_logs = Path(self.logdir, 'testlog.junit.xml')
|
||||
subprocess.run(['xmllint', junit_xml_logs], check=True)
|
||||
|
||||
def test_run_target_files_path(self):
|
||||
'''
|
||||
Test that run_targets are run from the correct directory
|
||||
|
|
Loading…
Reference in New Issue