Skip to content

[libc] Character converter skeleton class #143619

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 11, 2025
22 changes: 22 additions & 0 deletions libc/hdr/types/char32_t.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===-- Definition of char32_t.h ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_HDR_TYPES_CHAR32_T_H
#define LLVM_LIBC_HDR_TYPES_CHAR32_T_H

#ifdef LIBC_FULL_BUILD

#include "include/llvm-libc-types/char32_t.h"

#else // overlay mode

#include "hdr/uchar_overlay.h"

#endif // LLVM_LIBC_FULL_BUILD

#endif // LLVM_LIBC_HDR_TYPES_CHAR32_T_H
22 changes: 22 additions & 0 deletions libc/hdr/types/char8_t.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===-- Definition of char8_t.h -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_HDR_TYPES_CHAR8_T_H
#define LLVM_LIBC_HDR_TYPES_CHAR8_T_H

#ifdef LIBC_FULL_BUILD

#include "include/llvm-libc-types/char8_t.h"

#else // overlay mode

#include "hdr/uchar_overlay.h"

#endif // LLVM_LIBC_FULL_BUILD

#endif // LLVM_LIBC_HDR_TYPES_CHAR8_T_H
69 changes: 69 additions & 0 deletions libc/hdr/uchar_overlay.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
//===-- Including uchar.h in overlay mode ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_HDR_UCHAR_OVERLAY_H
#define LLVM_LIBC_HDR_UCHAR_OVERLAY_H

#ifdef LIBC_FULL_BUILD
#error "This header should only be included in overlay mode"
#endif

// Overlay mode

// glibc <uchar.h> header might provide extern inline definitions for few
// functions, causing external alias errors. They are guarded by
// `__USE_EXTERN_INLINES` macro. We temporarily disable `__USE_EXTERN_INLINES`
// macro by defining `__NO_INLINE__` before including <uchar.h>.
// And the same with `__USE_FORTIFY_LEVEL`, which will be temporarily disabled
// with `_FORTIFY_SOURCE`.

#ifdef _FORTIFY_SOURCE
#define LIBC_OLD_FORTIFY_SOURCE _FORTIFY_SOURCE
#undef _FORTIFY_SOURCE
#endif

#ifndef __NO_INLINE__
#define __NO_INLINE__ 1
#define LIBC_SET_NO_INLINE
#endif

#ifdef __USE_EXTERN_INLINES
#define LIBC_OLD_USE_EXTERN_INLINES
#undef __USE_EXTERN_INLINES
#endif

#ifdef __USE_FORTIFY_LEVEL
#define LIBC_OLD_USE_FORTIFY_LEVEL __USE_FORTIFY_LEVEL
#undef __USE_FORTIFY_LEVEL
#define __USE_FORTIFY_LEVEL 0
#endif

#include <uchar.h>

#ifdef LIBC_OLD_FORTIFY_SOURCE
#define _FORTIFY_SOURCE LIBC_OLD_FORTIFY_SOURCE
#undef LIBC_OLD_FORTIFY_SOURCE
#endif

#ifdef LIBC_SET_NO_INLINE
#undef __NO_INLINE__
#undef LIBC_SET_NO_INLINE
#endif

#ifdef LIBC_OLD_USE_FORTIFY_LEVEL
#undef __USE_FORTIFY_LEVEL
#define __USE_FORTIFY_LEVEL LIBC_OLD_USE_FORTIFY_LEVEL
#undef LIBC_OLD_USE_FORTIFY_LEVEL
#endif

#ifdef LIBC_OLD_USE_EXTERN_INLINES
#define __USE_EXTERN_INLINES
#undef LIBC_OLD_USE_EXTERN_INLINES
#endif

#endif // LLVM_LIBC_HDR_UCHAR_OVERLAY_H
26 changes: 26 additions & 0 deletions libc/src/__support/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
add_header_library(
mbstate
HDRS
mbstate.h
DEPENDS
libc.hdr.types.char32_t
)

add_object_library(
character_converter
HDRS
character_converter.h
SRCS
character_converter.cpp
DEPENDS
libc.hdr.types.char8_t
libc.hdr.types.char32_t
.mbstate
.utf_ret
)

add_header_library(
utf_ret
HDRS
utf_ret.h
)
32 changes: 32 additions & 0 deletions libc/src/__support/wchar/character_converter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
//===-- Implementation of a class for conversion --------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "hdr/types/char32_t.h"
#include "hdr/types/char8_t.h"
#include "src/__support/wchar/mbstate.h"
#include "src/__support/wchar/utf_ret.h"

#include "character_converter.h"

namespace LIBC_NAMESPACE_DECL {
namespace internal {

CharacterConverter::CharacterConverter(mbstate_t *mbstate) { state = mbstate; }

bool CharacterConverter::isComplete() {}

int CharacterConverter::push(char8_t utf8_byte) {}

int CharacterConverter::push(char32_t utf32) {}

utf_ret<char8_t> CharacterConverter::pop_utf8() {}

utf_ret<char32_t> CharacterConverter::pop_utf32() {}

} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
39 changes: 39 additions & 0 deletions libc/src/__support/wchar/character_converter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
//===-- Definition of a class for mbstate_t and conversion -----*-- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
#define LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H

#include "hdr/types/char32_t.h"
#include "hdr/types/char8_t.h"
#include "src/__support/wchar/mbstate.h"
#include "src/__support/wchar/utf_ret.h"

namespace LIBC_NAMESPACE_DECL {
namespace internal {

class CharacterConverter {
private:
mbstate_t *state;

public:
CharacterConverter(mbstate_t *mbstate);

bool isComplete();

int push(char8_t utf8_byte);
int push(char32_t utf32);

utf_ret<char8_t> pop_utf8();
utf_ret<char32_t> pop_utf32();
};

} // namespace internal
} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
27 changes: 27 additions & 0 deletions libc/src/__support/wchar/mbstate.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
//===-- Definition of mbstate-----------------------------------*-- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
#define LLVM_LIBC_SRC___SUPPORT_MBSTATE_H

#include "hdr/types/char32_t.h"
#include <stdint.h>

namespace LIBC_NAMESPACE_DECL {
namespace internal {

struct mbstate {
char32_t partial;
uint8_t bits_processed;
uint8_t total_bytes;
};

} // namespace internal
} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
21 changes: 21 additions & 0 deletions libc/src/__support/wchar/utf_ret.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//===-- Definition of utf_ret ----------------------------------*-- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_UTF_RET_H
#define LLVM_LIBC_SRC___SUPPORT_UTF_RET_H

namespace LIBC_NAMESPACE_DECL {

template <typename T> struct utf_ret {
T out;
int error;
};

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC___SUPPORT_UTF_RET_H
Loading