Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .build/build
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ cd "$(dirname "$0")"/..

mkdir -p build
cd build
cmake ..
cmake -DSTRING_ENCODING_TYPE="$ENCODING_TYPE" ..
cmake --build .
11 changes: 11 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,22 @@ on:
jobs:
linux:
runs-on: ubuntu-latest
strategy:
matrix:
encoding:
- ICONV
- ICU
env:
ENCODING_TYPE: ${{matrix.encoding}}
steps:
- uses: actions/checkout@v3
- name: restore
run: |
sudo apt-get install -y libgtest-dev
- name: restore ICU
run: |
sudo apt-get install -y libicu-dev
if: matrix.encoding == 'ICU'
- name: build
run: .build/build
- name: unittest
Expand Down
16 changes: 14 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@ set (CMAKE_INCLUDE_CURRENT_DIR ON)
find_package(ZLIB)
find_package(Iconv)

find_package(ICU COMPONENTS uc io)

set(ICU_FOUND FALSE)
if(ICU_INCLUDE_DIRS AND ICU_LIBRARIES)
SET(ICU_FOUND TRUE)
endif()

set (HEADERS
kaitai/kaitaistream.h
kaitai/kaitaistruct.h
Expand All @@ -17,11 +24,11 @@ set (SOURCES
kaitai/kaitaistream.cpp
)

set(STRING_ENCODING_TYPE "ICONV" CACHE STRING "Set the way strings have to be encoded (ICONV|WIN32API|NONE|...)")
set(STRING_ENCODING_TYPE "ICONV" CACHE STRING "Set the way strings have to be encoded (ICONV|WIN32API|ICU|NONE|...)")

set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)

add_library (${PROJECT_NAME} SHARED ${HEADERS} ${SOURCES})
add_library(${PROJECT_NAME} SHARED ${HEADERS} ${SOURCES})
set_property(TARGET ${PROJECT_NAME} PROPERTY PUBLIC_HEADER ${HEADERS})

if (ZLIB_FOUND)
Expand All @@ -33,6 +40,11 @@ if(Iconv_FOUND)
target_link_libraries(${PROJECT_NAME} PRIVATE Iconv::Iconv)
endif()

if(ICU_FOUND)
target_include_directories(${PROJECT_NAME} PRIVATE ${ICU_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME} PRIVATE ${ICU_LIBRARIES})
endif()

include(Common.cmake)

install(TARGETS ${PROJECT_NAME}
Expand Down
2 changes: 2 additions & 0 deletions Common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ if (STRING_ENCODING_TYPE STREQUAL "ICONV")
target_compile_definitions(${PROJECT_NAME} PRIVATE -DKS_STR_ENCODING_ICONV)
elseif (STRING_ENCODING_TYPE STREQUAL "WIN32API")
target_compile_definitions(${PROJECT_NAME} PRIVATE -DKS_STR_ENCODING_WIN32API)
elseif (STRING_ENCODING_TYPE STREQUAL "ICU")
target_compile_definitions(${PROJECT_NAME} PRIVATE -DKS_STR_ENCODING_ICU)
elseif (STRING_ENCODING_TYPE STREQUAL "NONE")
target_compile_definitions(${PROJECT_NAME} PRIVATE -DKS_STR_ENCODING_NONE)
else()
Expand Down
44 changes: 43 additions & 1 deletion kaitai/kaitaistream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -872,6 +872,48 @@ std::string kaitai::kstream::bytes_to_str(const std::string src, int codepage) {
return utf8;
}

#elif defined(KS_STR_ENCODING_ICU)
#include <unicode/ucnv.h>
#include <iostream>

std::string kaitai::kstream::bytes_to_str(const std::string src, const char *src_enc) {
// Start with a buffer length of double the source length.
size_t init_dst_len = src.length() * 2;
std::string dst(init_dst_len, ' ');

UErrorCode err = U_ZERO_ERROR;
int32_t dst_len = ucnv_convert(KS_STR_DEFAULT_ENCODING, src_enc, &dst[0], init_dst_len, src.c_str(), src.length(), &err);

if (err == U_BUFFER_OVERFLOW_ERROR) {
// We need a bigger buffer, but at least we know how much space exactly we need now
dst.resize(dst_len, ' ');

// Try again with the new buffer
err = U_ZERO_ERROR;
dst_len = ucnv_convert(KS_STR_DEFAULT_ENCODING, src_enc, &dst[0], dst_len, src.c_str(), src.length(), &err);
} else if (!U_FAILURE(err)) {
// Conversion succeed from the first try, shrink the buffer to fit
dst.resize(dst_len);
}

std::cout << "err = " << err << std::endl;
// Dump all bytes of result
for (int i = 0; i < dst_len; i++) {
std::cout << std::hex << (int)(uint8_t)dst[i] << " ";
}
std::cout << "\n";

if (U_FAILURE(err)) {
// Conversion failed
if (err == U_FILE_ACCESS_ERROR) {
throw unknown_encoding(src_enc);
} else {
throw bytes_to_str_error(u_errorName(err));
}
}

return dst;
}
#else
#error Need to decide how to handle strings: please define one of: KS_STR_ENCODING_ICONV, KS_STR_ENCODING_WIN32API, KS_STR_ENCODING_NONE
#error Need to decide how to handle strings: please define one of: KS_STR_ENCODING_ICONV, KS_STR_ENCODING_WIN32API, KS_STR_ENCODING_ICU, KS_STR_ENCODING_NONE
#endif
12 changes: 11 additions & 1 deletion tests/unittest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ TEST(KaitaiStreamTest, bytes_to_str_big_dest)
{
// Prepare a string in IBM437 that is reasonably big, fill it with U+2248 ALMOST EQUAL TO character,
// which is just 1 byte 0xFB in IBM437.
const int len = 10000000;
const int len = 10;
std::string src(len, '\xF7');

std::string res = kaitai::kstream::bytes_to_str(src, "IBM437");
Expand Down Expand Up @@ -274,6 +274,8 @@ TEST(KaitaiStreamTest, bytes_to_str_invalid_seq_euc_jp_too_short)
EXPECT_EQ(e.what(), std::string("bytes_to_str error: illegal sequence: EINVAL"));
#elif defined(KS_STR_ENCODING_WIN32API)
EXPECT_EQ(e.what(), std::string("bytes_to_str error: illegal sequence: MultiByteToWideChar"));
#elif defined(KS_STR_ENCODING_ICU)
EXPECT_EQ(e.what(), std::string("xxx"));
#else
#error Unknown KS_STR_ENCODING
#endif
Expand All @@ -291,6 +293,8 @@ TEST(KaitaiStreamTest, bytes_to_str_invalid_seq_gb2312_too_short)
EXPECT_EQ(e.what(), std::string("bytes_to_str error: illegal sequence: EINVAL"));
#elif defined(KS_STR_ENCODING_WIN32API)
EXPECT_EQ(e.what(), std::string("bytes_to_str error: illegal sequence: MultiByteToWideChar"));
#elif defined(KS_STR_ENCODING_ICU)
EXPECT_EQ(e.what(), std::string("xxx"));
#else
#error Unknown KS_STR_ENCODING
#endif
Expand All @@ -307,6 +311,8 @@ TEST(KaitaiStreamTest, bytes_to_str_invalid_seq_gb2312_two_bytes)
EXPECT_EQ(e.what(), std::string("bytes_to_str error: illegal sequence: EILSEQ"));
#elif defined(KS_STR_ENCODING_WIN32API)
EXPECT_EQ(e.what(), std::string("bytes_to_str error: illegal sequence: MultiByteToWideChar"));
#elif defined(KS_STR_ENCODING_ICU)
EXPECT_EQ(e.what(), std::string("xxx"));
#else
#error Unknown KS_STR_ENCODING
#endif
Expand All @@ -324,6 +330,8 @@ TEST(KaitaiStreamTest, bytes_to_str_invalid_seq_utf_16le_odd_bytes)
EXPECT_EQ(e.what(), std::string("bytes_to_str error: illegal sequence: EINVAL"));
#elif defined(KS_STR_ENCODING_WIN32API)
EXPECT_EQ(e.what(), std::string("bytes_to_str error: illegal sequence: incomplete"));
#elif defined(KS_STR_ENCODING_ICU)
EXPECT_EQ(e.what(), std::string("xxx"));
#else
#error Unknown KS_STR_ENCODING
#endif
Expand All @@ -342,6 +350,8 @@ TEST(KaitaiStreamTest, bytes_to_str_invalid_seq_utf_16le_incomplete_high_surroga
EXPECT_EQ(e.what(), std::string("bytes_to_str error: illegal sequence: EINVAL"));
#elif defined(KS_STR_ENCODING_WIN32API)
EXPECT_EQ(e.what(), std::string("bytes_to_str error: illegal sequence: WideCharToMultiByte"));
#elif defined(KS_STR_ENCODING_ICU)
EXPECT_EQ(e.what(), std::string("xxx"));
#else
#error Unknown KS_STR_ENCODING
#endif
Expand Down