From 704f8e1cc0f80a28c6ced57b0d8063984ca09341 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 5 Jan 2026 07:24:48 +0900 Subject: [PATCH 1/2] [C++] Add test for UTF-8 filenames on Windows --- cpp/src/arrow/io/file_test.cc | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/io/file_test.cc b/cpp/src/arrow/io/file_test.cc index 81ae716ef67..0ba596d9c73 100644 --- a/cpp/src/arrow/io/file_test.cc +++ b/cpp/src/arrow/io/file_test.cc @@ -115,7 +115,25 @@ TEST_F(TestFileOutputStream, FileNameWideCharConversionRangeException) { ASSERT_RAISES(Invalid, ReadableFile::Open(file_name)); } -// TODO add a test with a valid utf-8 filename +TEST_F(TestFileOutputStream, FileNameValidUtf8) { + // Test that file operations work with valid UTF-8 filenames. + // On Windows, PlatformFilename::FromString() converts UTF-8 strings to wide strings. + // On Unix, filenames are treated as opaque byte strings. + std::string utf8_file_name = "test_file_한국어_😀.txt"; + std::string utf8_path = TempFile(utf8_file_name); + + ASSERT_OK_AND_ASSIGN(auto file, FileOutputStream::Open(utf8_path)); + const char* data = "UTF-8 test data"; + ASSERT_OK(file->Write(data, strlen(data))); + ASSERT_OK(file->Close()); + + // Verify we can read it back + ASSERT_OK_AND_ASSIGN(auto readable_file, ReadableFile::Open(utf8_path)); + ASSERT_OK_AND_ASSIGN(auto buffer, readable_file->ReadAt(0, strlen(data))); + ASSERT_EQ(std::string(reinterpret_cast(buffer->data()), buffer->size()), + std::string(data)); + ASSERT_OK(readable_file->Close()); +} #endif TEST_F(TestFileOutputStream, DestructorClosesFile) { From c6261508a66080e6df03cb5eafa3284808f5c771 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 7 Jan 2026 10:28:23 +0900 Subject: [PATCH 2/2] review comment --- cpp/src/arrow/io/file_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/io/file_test.cc b/cpp/src/arrow/io/file_test.cc index 0ba596d9c73..17d8e5f3727 100644 --- a/cpp/src/arrow/io/file_test.cc +++ b/cpp/src/arrow/io/file_test.cc @@ -116,14 +116,14 @@ TEST_F(TestFileOutputStream, FileNameWideCharConversionRangeException) { } TEST_F(TestFileOutputStream, FileNameValidUtf8) { - // Test that file operations work with valid UTF-8 filenames. + // Test that file operations work with UTF-8 filenames (Korean + emoji). // On Windows, PlatformFilename::FromString() converts UTF-8 strings to wide strings. // On Unix, filenames are treated as opaque byte strings. std::string utf8_file_name = "test_file_한국어_😀.txt"; std::string utf8_path = TempFile(utf8_file_name); ASSERT_OK_AND_ASSIGN(auto file, FileOutputStream::Open(utf8_path)); - const char* data = "UTF-8 test data"; + const char* data = "test content"; ASSERT_OK(file->Write(data, strlen(data))); ASSERT_OK(file->Close());