Skip to content

Commit 8078e09

Browse files
authored
feat: cherry-pick UUID conversion logic from #2528. (#2648)
1 parent fa963bf commit 8078e09

File tree

3 files changed

+25
-1
lines changed

3 files changed

+25
-1
lines changed

native/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

native/core/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-
7777
object_store_opendal = {version = "0.54.0", optional = true}
7878
hdfs-sys = {version = "0.3", optional = true, features = ["hdfs_3_3"]}
7979
opendal = { version ="0.54.1", optional = true, features = ["services-hdfs"] }
80+
uuid = "1.0"
8081

8182
[target.'cfg(target_os = "linux")'.dependencies]
8283
procfs = "0.18.0"

native/core/src/parquet/parquet_support.rs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
use crate::execution::operators::ExecutionError;
19-
use arrow::array::{ListArray, MapArray};
19+
use arrow::array::{FixedSizeBinaryArray, ListArray, MapArray, StringArray};
2020
use arrow::buffer::NullBuffer;
2121
use arrow::compute::can_cast_types;
2222
use arrow::datatypes::{FieldRef, Fields};
@@ -200,6 +200,28 @@ fn parquet_convert_array(
200200
(Map(_, ordered_from), Map(_, ordered_to)) if ordered_from == ordered_to =>
201201
parquet_convert_map_to_map(array.as_map(), to_type, parquet_options, *ordered_to)
202202
,
203+
// Iceberg stores UUIDs as 16-byte fixed binary but Spark expects string representation.
204+
// Arrow doesn't support casting FixedSizeBinary to Utf8, so we handle it manually.
205+
(FixedSizeBinary(16), Utf8) => {
206+
let binary_array = array
207+
.as_any()
208+
.downcast_ref::<FixedSizeBinaryArray>()
209+
.expect("Expected a FixedSizeBinaryArray");
210+
211+
let string_array: StringArray = binary_array
212+
.iter()
213+
.map(|opt_bytes| {
214+
opt_bytes.map(|bytes| {
215+
let uuid = uuid::Uuid::from_bytes(
216+
bytes.try_into().expect("Expected 16 bytes")
217+
);
218+
uuid.to_string()
219+
})
220+
})
221+
.collect();
222+
223+
Ok(Arc::new(string_array))
224+
}
203225
// If Arrow cast supports the cast, delegate the cast to Arrow
204226
_ if can_cast_types(from_type, to_type) => {
205227
Ok(cast_with_options(&array, to_type, &PARQUET_OPTIONS)?)

0 commit comments

Comments
 (0)