[libc] Reland wchar string conversion mb to wc (#151048)

Added crash on nullptr to mbstowcs

---------

Co-authored-by: Sriya Pratipati <sriyap@google.com>
This commit is contained in:
sribee8
2025-07-29 16:34:10 +00:00
committed by GitHub
parent 335dbba741
commit a653934b58
15 changed files with 972 additions and 9 deletions

View File

@@ -1273,6 +1273,9 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.wchar.mbsinit
libc.src.wchar.mbrtowc
libc.src.wchar.mbtowc
libc.src.wchar.mbstowcs
libc.src.wchar.mbsrtowcs
libc.src.wchar.mbsnrtowcs
libc.src.wchar.wcrtomb
libc.src.wchar.wctomb
libc.src.wchar.wcstombs

View File

@@ -53,6 +53,33 @@ functions:
- type: wchar_t *__restrict
- type: const char *__restrict
- type: size_t
- name: mbsnrtowcs
standards:
- stdc
return_type: size_t
arguments:
- type: wchar_t *__restrict
- type: const char **__restrict
- type: size_t
- type: size_t
- type: mbstate_t *__restrict
- name: mbsrtowcs
standards:
- stdc
return_type: size_t
arguments:
- type: wchar_t *__restrict
- type: const char **__restrict
- type: size_t
- type: mbstate_t *__restrict
- name: mbstowcs
standards:
- stdc
return_type: size_t
arguments:
- type: wchar_t *__restrict
- type: const char *__restrict
- type: size_t
- name: mbsinit
standards:
- stdc

View File

@@ -60,14 +60,31 @@ add_object_library(
SRCS
mbrtowc.cpp
DEPENDS
libc.hdr.errno_macros
libc.hdr.types.wchar_t
libc.hdr.types.size_t
libc.src.__support.common
libc.src.__support.error_or
libc.src.__support.macros.config
.character_converter
.mbstate
libc.hdr.errno_macros
libc.hdr.types.wchar_t
libc.hdr.types.size_t
libc.src.__support.common
libc.src.__support.error_or
libc.src.__support.macros.config
.character_converter
.mbstate
)
add_header_library(
mbsnrtowcs
HDRS
mbsnrtowcs.h
DEPENDS
libc.hdr.errno_macros
libc.hdr.types.wchar_t
libc.hdr.types.size_t
libc.src.__support.common
libc.src.__support.error_or
libc.src.__support.macros.config
libc.src.__support.macros.null_check
.character_converter
.mbstate
.string_converter
)
add_header_library(

View File

@@ -0,0 +1,66 @@
//===-- Implementation for mbsnrtowcs function ------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H
#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H
#include "hdr/errno_macros.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/error_or.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/null_check.h"
#include "src/__support/wchar/character_converter.h"
#include "src/__support/wchar/mbstate.h"
#include "src/__support/wchar/string_converter.h"
namespace LIBC_NAMESPACE_DECL {
namespace internal {
LIBC_INLINE static ErrorOr<size_t> mbsnrtowcs(wchar_t *__restrict dst,
const char **__restrict src,
size_t nmc, size_t len,
mbstate *__restrict ps) {
LIBC_CRASH_ON_NULLPTR(src);
// Checking if mbstate is valid
CharacterConverter char_conv(ps);
if (!char_conv.isValidState())
return Error(EINVAL);
StringConverter<char8_t> str_conv(reinterpret_cast<const char8_t *>(*src), ps,
len, nmc);
size_t dst_idx = 0;
ErrorOr<char32_t> converted = str_conv.popUTF32();
while (converted.has_value()) {
if (dst != nullptr)
dst[dst_idx] = converted.value();
// null terminator should not be counted in return value
if (converted.value() == L'\0') {
if (dst != nullptr)
*src = nullptr;
return dst_idx;
}
dst_idx++;
converted = str_conv.popUTF32();
}
if (converted.error() == -1) { // if we hit conversion limit
if (dst != nullptr)
*src += str_conv.getSourceIndex();
return dst_idx;
}
return Error(converted.error());
}
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS_H

View File

@@ -184,6 +184,55 @@ add_entrypoint_object(
libc.src.__support.wchar.mbstate
)
add_entrypoint_object(
mbstowcs
SRCS
mbstowcs.cpp
HDRS
mbstowcs.h
DEPENDS
libc.hdr.types.size_t
libc.hdr.types.wchar_t
libc.src.__support.common
libc.src.__support.macros.config
libc.src.__support.macros.null_check
libc.src.__support.libc_errno
libc.src.__support.wchar.mbstate
libc.src.__support.wchar.mbsnrtowcs
)
add_entrypoint_object(
mbsrtowcs
SRCS
mbsrtowcs.cpp
HDRS
mbsrtowcs.h
DEPENDS
libc.hdr.types.size_t
libc.hdr.types.wchar_t
libc.src.__support.common
libc.src.__support.macros.config
libc.src.__support.libc_errno
libc.src.__support.wchar.mbstate
libc.src.__support.wchar.mbsnrtowcs
)
add_entrypoint_object(
mbsnrtowcs
SRCS
mbsnrtowcs.cpp
HDRS
mbsnrtowcs.h
DEPENDS
libc.hdr.types.size_t
libc.hdr.types.wchar_t
libc.src.__support.common
libc.src.__support.macros.config
libc.src.__support.libc_errno
libc.src.__support.wchar.mbstate
libc.src.__support.wchar.mbsnrtowcs
)
add_entrypoint_object(
wcstombs
SRCS

View File

@@ -0,0 +1,39 @@
//===-- Implementation of mbsnrtowcs --------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/wchar/mbsnrtowcs.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbsnrtowcs.h"
#include "src/__support/wchar/mbstate.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(size_t, mbsnrtowcs,
(wchar_t *__restrict dst, const char **__restrict src,
size_t nmc, size_t len, mbstate_t *__restrict ps)) {
static internal::mbstate internal_mbstate;
// If destination is null, ignore len
len = dst == nullptr ? SIZE_MAX : len;
auto ret = internal::mbsnrtowcs(
dst, src, nmc, len,
ps == nullptr ? &internal_mbstate
: reinterpret_cast<internal::mbstate *>(ps));
if (!ret.has_value()) {
// Encoding failure
libc_errno = ret.error();
return -1;
}
return ret.value();
}
} // namespace LIBC_NAMESPACE_DECL

View File

@@ -0,0 +1,24 @@
//===-- Implementation header for mbsnrtowcs ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H
#define LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H
#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE_DECL {
size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
size_t nmc, size_t len, mbstate_t *__restrict ps);
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC_WCHAR_MBSNRTOWCS_H

View File

@@ -0,0 +1,39 @@
//===-- Implementation of mbsrtowcs ---------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/wchar/mbsrtowcs.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbsnrtowcs.h"
#include "src/__support/wchar/mbstate.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(size_t, mbsrtowcs,
(wchar_t *__restrict dst, const char **__restrict src,
size_t len, mbstate_t *__restrict ps)) {
static internal::mbstate internal_mbstate;
// If destination is null, ignore len
len = dst == nullptr ? SIZE_MAX : len;
auto ret = internal::mbsnrtowcs(
dst, src, SIZE_MAX, len,
ps == nullptr ? &internal_mbstate
: reinterpret_cast<internal::mbstate *>(ps));
if (!ret.has_value()) {
// Encoding failure
libc_errno = ret.error();
return -1;
}
return ret.value();
}
} // namespace LIBC_NAMESPACE_DECL

View File

@@ -0,0 +1,24 @@
//===-- Implementation header for mbsrtowcs -------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
#define LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE_DECL {
size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
size_t len, mbstate_t *__restrict ps);
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H

View File

@@ -0,0 +1,40 @@
//===-- Implementation of mbstowcs ----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/wchar/mbstowcs.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/null_check.h"
#include "src/__support/wchar/mbsnrtowcs.h"
#include "src/__support/wchar/mbstate.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(size_t, mbstowcs,
(wchar_t *__restrict pwcs, const char *__restrict s,
size_t n)) {
LIBC_CRASH_ON_NULLPTR(s);
// If destination is null, ignore n
n = pwcs == nullptr ? SIZE_MAX : n;
static internal::mbstate internal_mbstate;
const char *temp = s;
auto ret = internal::mbsnrtowcs(pwcs, &temp, SIZE_MAX, n, &internal_mbstate);
if (!ret.has_value()) {
// Encoding failure
libc_errno = ret.error();
return -1;
}
return ret.value();
}
} // namespace LIBC_NAMESPACE_DECL

22
libc/src/wchar/mbstowcs.h Normal file
View File

@@ -0,0 +1,22 @@
//===-- Implementation header for mbstowcs --------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H
#define LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE_DECL {
size_t mbstowcs(wchar_t *__restrict pwcs, const char *__restrict s, size_t n);
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC_WCHAR_MBSTOWCS_H

View File

@@ -75,6 +75,19 @@ add_libc_test(
libc.test.UnitTest.ErrnoCheckingTest
)
add_libc_test(
mbstowcs_test
SUITE
libc_wchar_unittests
SRCS
mbstowcs_test.cpp
DEPENDS
libc.src.__support.libc_errno
libc.src.wchar.mbstowcs
libc.hdr.types.wchar_t
libc.test.UnitTest.ErrnoCheckingTest
)
add_libc_test(
mblen_test
SUITE
@@ -87,6 +100,22 @@ add_libc_test(
libc.test.UnitTest.ErrnoCheckingTest
)
add_libc_test(
mbsrtowcs_test
SUITE
libc_wchar_unittests
SRCS
mbsrtowcs_test.cpp
DEPENDS
libc.src.__support.libc_errno
libc.src.__support.wchar.mbstate
libc.src.string.memset
libc.src.wchar.mbsrtowcs
libc.hdr.types.mbstate_t
libc.hdr.types.wchar_t
libc.test.UnitTest.ErrnoCheckingTest
)
add_libc_test(
mbrlen_test
SUITE
@@ -97,7 +126,23 @@ add_libc_test(
libc.src.__support.libc_errno
libc.src.__support.wchar.mbstate
libc.src.string.memset
libc.src.wchar.mbrlen
libc.src.wchar.mbsrlen
libc.hdr.types.mbstate_t
libc.hdr.types.wchar_t
libc.test.UnitTest.ErrnoCheckingTest
)
add_libc_test(
mbsnrtowcs_test
SUITE
libc_wchar_unittests
SRCS
mbsnrtowcs_test.cpp
DEPENDS
libc.src.__support.libc_errno
libc.src.__support.wchar.mbstate
libc.src.string.memset
libc.src.wchar.mbsnrtowcs
libc.hdr.types.mbstate_t
libc.hdr.types.wchar_t
libc.test.UnitTest.ErrnoCheckingTest

View File

@@ -0,0 +1,212 @@
//===-- Unittests for mbsetowcs -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "hdr/types/mbstate_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/null_check.h"
#include "src/__support/wchar/mbstate.h"
#include "src/string/memset.h"
#include "src/wchar/mbsnrtowcs.h"
#include "test/UnitTest/ErrnoCheckingTest.h"
#include "test/UnitTest/Test.h"
using LlvmLibcMBSNRToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
TEST_F(LlvmLibcMBSNRToWCSTest, OneByteOneChar) {
const char *ch = "A";
const char *original = ch;
wchar_t dest[2];
mbstate_t mb;
LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &ch, 1, 1, &mb);
ASSERT_EQ(static_cast<char>(*dest), 'A');
ASSERT_EQ(static_cast<int>(n), 1);
// Should point to null terminator now
ASSERT_EQ(ch, original + 1);
ASSERT_ERRNO_SUCCESS();
n = LIBC_NAMESPACE::mbsnrtowcs(dest + 1, &ch, 1, 1, &mb);
ASSERT_EQ(static_cast<char>(dest[1]), '\0');
// Should not include null terminator
ASSERT_EQ(static_cast<int>(n), 0);
// Should now be a nullptr
ASSERT_EQ(ch, nullptr);
ASSERT_ERRNO_SUCCESS();
}
TEST_F(LlvmLibcMBSNRToWCSTest, FourByteOneChar) {
const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹
const char *original = src;
wchar_t dest[2];
mbstate_t mb;
LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
// Not enough bytes for the full character
size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 3, 2, &mb);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<int>(n), 0);
ASSERT_EQ(src, original + 3);
// Needs 2 more bytes (last byte of cat + null terminator)
n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 2, 2, &mb);
ASSERT_ERRNO_SUCCESS();
// Does not include null terminator
ASSERT_EQ(static_cast<int>(n), 1);
ASSERT_EQ(src, nullptr);
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
ASSERT_TRUE(dest[1] == L'\0');
}
TEST_F(LlvmLibcMBSNRToWCSTest, MixedNumberOfBytes) {
// 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹'
const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9";
const char *original = src;
wchar_t dest[5];
mbstate_t mb;
LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
// Read 'A'
size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 1, 1, &mb);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<char>(dest[0]), 'A');
ASSERT_EQ(static_cast<int>(n), 1);
ASSERT_EQ(src, original + 1);
// Read sigma 'Σ'
n = LIBC_NAMESPACE::mbsnrtowcs(dest + 1, &src, 2, 1, &mb);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<int>(dest[1]), 931);
ASSERT_EQ(static_cast<int>(n), 1);
ASSERT_EQ(src, original + 3);
// Read recycling '♻'
n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 2, 5, &mb);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<int>(n), 0);
ASSERT_EQ(src, original + 5);
n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 1, 1, &mb);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<int>(n), 1);
ASSERT_EQ(src, original + 6);
ASSERT_EQ(static_cast<int>(dest[2]), 9851);
// Read laughing cat emoji '😹'
n = LIBC_NAMESPACE::mbsnrtowcs(dest + 3, &src, 4, 5, &mb);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<int>(n), 1);
ASSERT_EQ(src, original + 10);
ASSERT_EQ(static_cast<int>(dest[3]), 128569);
n = LIBC_NAMESPACE::mbsnrtowcs(dest + 4, &src, 4, 4, nullptr);
ASSERT_TRUE(dest[4] == L'\0');
ASSERT_ERRNO_SUCCESS();
// Should not count null terminator in number
ASSERT_EQ(static_cast<int>(n), 0);
// Should now be a nullptr
ASSERT_EQ(src, nullptr);
}
TEST_F(LlvmLibcMBSNRToWCSTest, ReadLessThanStringLength) {
// Four laughing cat emojis "😹😹😹😹"
const char *src =
"\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
const char *original = src;
wchar_t dest[5] = {L'a', L'b', L'c', L'd', L'e'};
size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 100, 3, nullptr);
ASSERT_ERRNO_SUCCESS();
// Should have read 3 emojis
ASSERT_EQ(static_cast<int>(n), 3);
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
ASSERT_EQ(static_cast<int>(dest[1]), 128569);
ASSERT_EQ(static_cast<int>(dest[2]), 128569);
ASSERT_TRUE(dest[3] == L'd');
ASSERT_TRUE(dest[4] == L'e');
// Read three laughing cat emojis, 12 bytes
ASSERT_EQ(src, original + 12);
}
TEST_F(LlvmLibcMBSNRToWCSTest, InvalidFirstByte) {
// 0x80 is invalid first byte of mb character
const char *src =
"\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
const char *original = src;
wchar_t dest[3];
size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 88, 88, nullptr);
// Should return error and set errno
ASSERT_EQ(static_cast<int>(n), -1);
ASSERT_ERRNO_EQ(EILSEQ);
// Should not update pointer
ASSERT_EQ(src, original);
}
TEST_F(LlvmLibcMBSNRToWCSTest, InvalidMiddleByte) {
// The 7th byte is invalid for a 4 byte character
const char *src =
"\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
const char *original = src;
wchar_t dest[3];
mbstate_t mb;
LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
// Successfully read one character and first byte of the second character
size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 5, 88, &mb);
ASSERT_EQ(static_cast<int>(n), 1);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(src, original + 5);
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
n = LIBC_NAMESPACE::mbsnrtowcs(dest + 1, &src, 5, 88, &mb);
// Should return error, set errno, and not update the pointer
ASSERT_EQ(static_cast<int>(n), -1);
ASSERT_ERRNO_EQ(EILSEQ);
ASSERT_EQ(src, original + 5);
}
TEST_F(LlvmLibcMBSNRToWCSTest, NullDestination) {
// Four laughing cat emojis "😹😹😹😹"
const char *src =
"\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
const char *original = src;
size_t n = LIBC_NAMESPACE::mbsnrtowcs(nullptr, &src, 88, 88, nullptr);
ASSERT_ERRNO_SUCCESS();
// Null destination should ignore len and read till end of string
ASSERT_EQ(static_cast<int>(n), 4);
// It should also not change the src pointer
ASSERT_EQ(src, original);
}
TEST_F(LlvmLibcMBSNRToWCSTest, ErrnoChecks) {
// Two laughing cat emojis and invalid 3rd mb char (3rd byte of it)
const char *src =
"\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9";
const char *original = src;
wchar_t dest[5];
mbstate_t mb;
LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
// First two bytes are valid --> should not set errno
size_t n = LIBC_NAMESPACE::mbsnrtowcs(dest, &src, 80, 2, &mb);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<int>(n), 2);
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
ASSERT_EQ(static_cast<int>(dest[1]), 128569);
ASSERT_EQ(src, original + 8);
// Trying to read the 3rd byte should set errno
n = LIBC_NAMESPACE::mbsnrtowcs(dest + 2, &src, 4, 2, &mb);
ASSERT_ERRNO_EQ(EILSEQ);
ASSERT_EQ(static_cast<int>(n), -1);
// Should not move the pointer
ASSERT_EQ(src, original + 8);
}
#if defined(LIBC_ADD_NULL_CHECKS)
TEST(LlvmLibcMBSNRToWCSTest, NullptrCrash) {
// Passing in a nullptr should crash the program.
EXPECT_DEATH(
[] { LIBC_NAMESPACE::mbsnrtowcs(nullptr, nullptr, 1, 1, nullptr); },
WITH_SIGNAL(-1));
}
#endif // LIBC_ADD_NULL_CHECKS

View File

@@ -0,0 +1,185 @@
//===-- Unittests for mbsetowcs -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "hdr/types/mbstate_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/null_check.h"
#include "src/__support/wchar/mbstate.h"
#include "src/string/memset.h"
#include "src/wchar/mbsrtowcs.h"
#include "test/UnitTest/ErrnoCheckingTest.h"
#include "test/UnitTest/Test.h"
using LlvmLibcMBSRToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
TEST_F(LlvmLibcMBSRToWCSTest, OneByteOneChar) {
const char *ch = "A";
const char *original = ch;
wchar_t dest[2];
mbstate_t mb;
LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &ch, 1, &mb);
ASSERT_EQ(static_cast<char>(*dest), 'A');
ASSERT_EQ(static_cast<int>(n), 1);
// Should point to null terminator now
ASSERT_EQ(ch, original + 1);
ASSERT_ERRNO_SUCCESS();
n = LIBC_NAMESPACE::mbsrtowcs(dest + 1, &ch, 1, &mb);
ASSERT_EQ(static_cast<char>(dest[1]), '\0');
// Should not include null terminator
ASSERT_EQ(static_cast<int>(n), 0);
// Should now be a nullptr
ASSERT_EQ(ch, nullptr);
ASSERT_ERRNO_SUCCESS();
}
TEST_F(LlvmLibcMBSRToWCSTest, FourByteOneChar) {
const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹
wchar_t dest[2];
mbstate_t mb;
LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 2, &mb);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
ASSERT_TRUE(dest[1] == L'\0');
// Should not count null terminator in number
ASSERT_EQ(static_cast<int>(n), 1);
// Should now be a nullptr
ASSERT_EQ(src, nullptr);
}
TEST_F(LlvmLibcMBSRToWCSTest, MultiByteTwoCharacters) {
// Two laughing cat emojis "😹😹"
const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
wchar_t dest[3];
mbstate_t mb;
LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, &mb);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
ASSERT_EQ(static_cast<int>(dest[1]), 128569);
ASSERT_TRUE(dest[2] == L'\0');
// Should not count null terminator in number
ASSERT_EQ(static_cast<int>(n), 2);
// Should now be a nullptr
ASSERT_EQ(src, nullptr);
}
TEST_F(LlvmLibcMBSRToWCSTest, MixedNumberOfBytes) {
// 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹'
const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9";
const char *original = src;
wchar_t dest[5];
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 4, nullptr);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<char>(dest[0]), 'A');
ASSERT_EQ(static_cast<int>(dest[1]), 931);
ASSERT_EQ(static_cast<int>(dest[2]), 9851);
ASSERT_EQ(static_cast<int>(dest[3]), 128569);
// Should point to null terminator (byte at 10th index)
ASSERT_EQ(src, original + 10);
ASSERT_EQ(static_cast<int>(n), 4);
n = LIBC_NAMESPACE::mbsrtowcs(dest + 4, &src, 4, nullptr);
ASSERT_TRUE(dest[4] == L'\0');
ASSERT_ERRNO_SUCCESS();
// Should not count null terminator in number
ASSERT_EQ(static_cast<int>(n), 0);
// Should now be a nullptr
ASSERT_EQ(src, nullptr);
}
TEST_F(LlvmLibcMBSRToWCSTest, ReadLessThanStringLength) {
// Four laughing cat emojis "😹😹😹😹"
const char *src =
"\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
const char *original = src;
wchar_t dest[5] = {L'a', L'b', L'c', L'd', L'e'};
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr);
ASSERT_ERRNO_SUCCESS();
// Should have read 3 emojis
ASSERT_EQ(static_cast<int>(n), 3);
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
ASSERT_EQ(static_cast<int>(dest[1]), 128569);
ASSERT_EQ(static_cast<int>(dest[2]), 128569);
ASSERT_TRUE(dest[3] == L'd');
ASSERT_TRUE(dest[4] == L'e');
// Read three laughing cat emojis, 12 bytes
ASSERT_EQ(src, original + 12);
}
TEST_F(LlvmLibcMBSRToWCSTest, InvalidFirstByte) {
// 0x80 is invalid first byte of mb character
const char *src =
"\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
const char *original = src;
wchar_t dest[3];
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr);
// Should return error and set errno
ASSERT_EQ(static_cast<int>(n), -1);
ASSERT_ERRNO_EQ(EILSEQ);
// Should not update pointer
ASSERT_EQ(src, original);
}
TEST_F(LlvmLibcMBSRToWCSTest, InvalidMiddleByte) {
// The 7th byte is invalid for a 4 byte character
const char *src =
"\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
const char *original = src;
wchar_t dest[3];
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 5, nullptr);
// Should return error, set errno, and not update the pointer
ASSERT_EQ(static_cast<int>(n), -1);
ASSERT_ERRNO_EQ(EILSEQ);
ASSERT_EQ(src, original);
}
TEST_F(LlvmLibcMBSRToWCSTest, NullDestination) {
// Four laughing cat emojis "😹😹😹😹"
const char *src =
"\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
const char *original = src;
size_t n = LIBC_NAMESPACE::mbsrtowcs(nullptr, &src, 2, nullptr);
ASSERT_ERRNO_SUCCESS();
// Null destination should ignore len and read till end of string
ASSERT_EQ(static_cast<int>(n), 4);
// It should also not change the src pointer
ASSERT_EQ(src, original);
}
TEST_F(LlvmLibcMBSRToWCSTest, ErrnoChecks) {
// Two laughing cat emojis and invalid 3rd mb char (3rd byte of it)
const char *src =
"\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9";
const char *original = src;
wchar_t dest[5];
// First two bytes are valid --> should not set errno
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 2, nullptr);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<int>(n), 2);
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
ASSERT_EQ(static_cast<int>(dest[1]), 128569);
ASSERT_EQ(src, original + 8);
// Trying to read the 3rd byte should set errno
n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 2, nullptr);
ASSERT_ERRNO_EQ(EILSEQ);
ASSERT_EQ(static_cast<int>(n), -1);
// Should not move the pointer
ASSERT_EQ(src, original + 8);
}
#if defined(LIBC_ADD_NULL_CHECKS)
TEST(LlvmLibcMBSRToWCSTest, NullptrCrash) {
// Passing in a nullptr should crash the program.
EXPECT_DEATH([] { LIBC_NAMESPACE::mbsrtowcs(nullptr, nullptr, 1, nullptr); },
WITH_SIGNAL(-1));
}
#endif // LIBC_ADD_NULL_CHECKS

View File

@@ -0,0 +1,171 @@
//===-- Unittests for mbstowcs --------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "hdr/types/wchar_t.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/null_check.h"
#include "src/wchar/mbstowcs.h"
#include "test/UnitTest/ErrnoCheckingTest.h"
#include "test/UnitTest/Test.h"
using LlvmLibcMBSToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
TEST_F(LlvmLibcMBSToWCSTest, OneByteOneChar) {
const char *ch = "A";
const char *original = ch;
wchar_t dest[2];
size_t n = LIBC_NAMESPACE::mbstowcs(dest, ch, 1);
ASSERT_EQ(static_cast<char>(*dest), 'A');
ASSERT_EQ(static_cast<int>(n), 1);
// Making sure the pointer is not getting updated
ASSERT_EQ(ch, original);
ASSERT_ERRNO_SUCCESS();
n = LIBC_NAMESPACE::mbstowcs(dest + 1, ch + 1, 1);
ASSERT_EQ(static_cast<char>(dest[1]), '\0');
// Should not include null terminator
ASSERT_EQ(static_cast<int>(n), 0);
// Making sure the pointer is not getting updated
ASSERT_EQ(ch, original);
ASSERT_ERRNO_SUCCESS();
}
TEST_F(LlvmLibcMBSToWCSTest, FourByteOneChar) {
const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹
const char *original = src;
wchar_t dest[2];
size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
ASSERT_TRUE(dest[1] == L'\0');
// Should not count null terminator in number
ASSERT_EQ(static_cast<int>(n), 1);
// Making sure the pointer is not getting updated
ASSERT_EQ(src, original);
}
TEST_F(LlvmLibcMBSToWCSTest, MultiByteTwoCharacters) {
// Two laughing cat emojis "😹😹"
const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
const char *original = src;
wchar_t dest[3];
size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
ASSERT_EQ(static_cast<int>(dest[1]), 128569);
ASSERT_TRUE(dest[2] == L'\0');
// Should not count null terminator in number
ASSERT_EQ(static_cast<int>(n), 2);
// Making sure the pointer is not getting updated
ASSERT_EQ(src, original);
}
TEST_F(LlvmLibcMBSToWCSTest, MixedNumberOfBytes) {
// 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹'
const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9";
const char *original = src;
wchar_t dest[5];
size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<char>(dest[0]), 'A');
ASSERT_EQ(static_cast<int>(dest[1]), 931);
ASSERT_EQ(static_cast<int>(dest[2]), 9851);
ASSERT_EQ(static_cast<int>(dest[3]), 128569);
ASSERT_TRUE(dest[4] == L'\0');
// Should not count null terminator in number
ASSERT_EQ(static_cast<int>(n), 4);
// Making sure the pointer is not getting updated
ASSERT_EQ(src, original);
}
TEST_F(LlvmLibcMBSToWCSTest, ReadLessThanStringLength) {
// Four laughing cat emojis "😹😹😹😹"
const char *src =
"\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
const char *original = src;
wchar_t dest[5] = {L'a', L'b', L'c', L'd', L'e'};
size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3);
ASSERT_ERRNO_SUCCESS();
// Should have read 3 emojis
ASSERT_EQ(static_cast<int>(n), 3);
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
ASSERT_EQ(static_cast<int>(dest[1]), 128569);
ASSERT_EQ(static_cast<int>(dest[2]), 128569);
ASSERT_TRUE(dest[3] == L'd');
ASSERT_TRUE(dest[4] == L'e');
// Making sure the pointer is not getting updated
ASSERT_EQ(src, original);
}
TEST_F(LlvmLibcMBSToWCSTest, InvalidFirstByte) {
// 0x80 is invalid first byte of mb character
const char *src =
"\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
wchar_t dest[3];
size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3);
// Should return error and set errno
ASSERT_EQ(static_cast<int>(n), -1);
ASSERT_ERRNO_EQ(EILSEQ);
}
TEST_F(LlvmLibcMBSToWCSTest, InvalidMiddleByte) {
// The 7th byte is invalid for a 4 byte character
const char *src =
"\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
const char *original = src;
wchar_t dest[3];
size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5);
// Should return error and set errno
ASSERT_EQ(static_cast<int>(n), -1);
ASSERT_ERRNO_EQ(EILSEQ);
// Making sure the pointer is not getting updated
ASSERT_EQ(src, original);
}
TEST_F(LlvmLibcMBSToWCSTest, NullDestination) {
// Four laughing cat emojis "😹😹😹😹"
const char *src =
"\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
const char *original = src;
size_t n = LIBC_NAMESPACE::mbstowcs(nullptr, src, 2);
ASSERT_ERRNO_SUCCESS();
// Null destination should ignore len and read till end of string
ASSERT_EQ(static_cast<int>(n), 4);
// Making sure the pointer is not getting updated
ASSERT_EQ(src, original);
}
TEST_F(LlvmLibcMBSToWCSTest, ErrnoChecks) {
// Two laughing cat emojis and invalid 3rd mb char (3rd byte of it)
const char *src =
"\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9";
const char *original = src;
wchar_t dest[5];
// First two bytes are valid --> should not set errno
size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<int>(n), 2);
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
ASSERT_EQ(static_cast<int>(dest[1]), 128569);
// Making sure the pointer is not getting updated
ASSERT_EQ(src, original);
// Trying to read the 3rd byte should set errno
n = LIBC_NAMESPACE::mbstowcs(dest, src + 2, 2);
ASSERT_ERRNO_EQ(EILSEQ);
ASSERT_EQ(static_cast<int>(n), -1);
// Making sure the pointer is not getting updated
ASSERT_EQ(src, original);
}
#if defined(LIBC_ADD_NULL_CHECKS)
TEST(LlvmLibcMBSToWCSTest, NullptrCrash) {
// Passing in a nullptr should crash the program.
EXPECT_DEATH([] { LIBC_NAMESPACE::mbstowcs(nullptr, nullptr, 1); },
WITH_SIGNAL(-1));
}
#endif // LIBC_ADD_NULL_CHECKS