Skip to content

Commit 402184b

Browse files
committed
add to_utf8 and to_utf32
1 parent e9ea386 commit 402184b

File tree

3 files changed

+76
-0
lines changed

3 files changed

+76
-0
lines changed

include/gf2/core/StringUtils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,9 @@ namespace gf {
138138
return { string };
139139
}
140140

141+
GF_CORE_API std::u32string to_utf32(std::string_view utf8);
142+
GF_CORE_API std::string to_utf8(std::u32string_view utf32);
143+
141144
}
142145

143146
#endif // GF_STRING_UTILS_H

library/core/StringUtils.cc

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,4 +127,37 @@ namespace gf {
127127
return result;
128128
}
129129

130+
std::u32string to_utf32(std::string_view utf8)
131+
{
132+
CodepointRange range = codepoints(utf8);
133+
return { range.begin(), range.end() };
134+
}
135+
136+
std::string to_utf8(std::u32string_view utf32)
137+
{
138+
std::string utf8;
139+
140+
for (char32_t c : utf32) {
141+
if (c < 0x80) {
142+
utf8.push_back(static_cast<char>(c & 0x7F));
143+
} else if (c < 0x800) {
144+
utf8.push_back(static_cast<char>(((c >> 6) & 0x1F) | 0xC0));
145+
utf8.push_back(static_cast<char>(((c >> 0) & 0x3F) | 0x80));
146+
} else if (c < 0x10000) {
147+
utf8.push_back(static_cast<char>(((c >> 12) & 0x0F) | 0xE0));
148+
utf8.push_back(static_cast<char>(((c >> 6) & 0x3F) | 0x80));
149+
utf8.push_back(static_cast<char>(((c >> 0) & 0x3F) | 0x80));
150+
} else if (c < 0x110000) {
151+
utf8.push_back(static_cast<char>(((c >> 18) & 0x07) | 0xF0));
152+
utf8.push_back(static_cast<char>(((c >> 12) & 0x3F) | 0x80));
153+
utf8.push_back(static_cast<char>(((c >> 6) & 0x3F) | 0x80));
154+
utf8.push_back(static_cast<char>(((c >> 0) & 0x3F) | 0x80));
155+
} else {
156+
assert(false);
157+
}
158+
}
159+
160+
return utf8;
161+
}
162+
130163
}

tests/tests_core_StringUtils.cc

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,43 @@ TEST(StringUtilsTest, Codepoints) {
123123
EXPECT_EQ(*cp.begin(), U'\U0001F3AE');
124124
}
125125
}
126+
127+
TEST(StringUtilsTest, ToUtf32) {
128+
auto str1 = gf::to_utf32("");
129+
EXPECT_EQ(str1.size(), 0u);
130+
EXPECT_EQ(str1, U"");
131+
132+
auto str2 = gf::to_utf32("abc");
133+
EXPECT_EQ(str2.size(), 3u);
134+
EXPECT_EQ(str2, U"abc");
135+
136+
auto str3 = gf::to_utf32("éçù€");
137+
EXPECT_EQ(str3.size(), 4u);
138+
EXPECT_EQ(str3, U"éçù€");
139+
140+
auto str4 = gf::to_utf32("東京");
141+
EXPECT_EQ(str4.size(), 2u);
142+
EXPECT_EQ(str4, U"東京");
143+
144+
auto str5 = gf::to_utf32("🎮");
145+
EXPECT_EQ(str5.size(), 1u);
146+
EXPECT_EQ(str5, U"🎮");
147+
}
148+
149+
150+
TEST(StringUtilsTest, ToUtf8) {
151+
auto str1 = gf::to_utf8(U"");
152+
EXPECT_EQ(str1, "");
153+
154+
auto str2 = gf::to_utf8(U"abc");
155+
EXPECT_EQ(str2, "abc");
156+
157+
auto str3 = gf::to_utf8(U"éçù€");
158+
EXPECT_EQ(str3, "éçù€");
159+
160+
auto str4 = gf::to_utf8(U"東京");
161+
EXPECT_EQ(str4, "東京");
162+
163+
auto str5 = gf::to_utf8(U"🎮");
164+
EXPECT_EQ(str5, "🎮");
165+
}

0 commit comments

Comments
 (0)