|
16 | 16 | // under the License. |
17 | 17 |
|
18 | 18 | use crate::execution::operators::ExecutionError; |
19 | | -use arrow::array::{ListArray, MapArray}; |
| 19 | +use arrow::array::{FixedSizeBinaryArray, ListArray, MapArray, StringArray}; |
20 | 20 | use arrow::buffer::NullBuffer; |
21 | 21 | use arrow::compute::can_cast_types; |
22 | 22 | use arrow::datatypes::{FieldRef, Fields}; |
@@ -200,6 +200,28 @@ fn parquet_convert_array( |
200 | 200 | (Map(_, ordered_from), Map(_, ordered_to)) if ordered_from == ordered_to => |
201 | 201 | parquet_convert_map_to_map(array.as_map(), to_type, parquet_options, *ordered_to) |
202 | 202 | , |
| 203 | + // Iceberg stores UUIDs as 16-byte fixed binary but Spark expects string representation. |
| 204 | + // Arrow doesn't support casting FixedSizeBinary to Utf8, so we handle it manually. |
| 205 | + (FixedSizeBinary(16), Utf8) => { |
| 206 | + let binary_array = array |
| 207 | + .as_any() |
| 208 | + .downcast_ref::<FixedSizeBinaryArray>() |
| 209 | + .expect("Expected a FixedSizeBinaryArray"); |
| 210 | + |
| 211 | + let string_array: StringArray = binary_array |
| 212 | + .iter() |
| 213 | + .map(|opt_bytes| { |
| 214 | + opt_bytes.map(|bytes| { |
| 215 | + let uuid = uuid::Uuid::from_bytes( |
| 216 | + bytes.try_into().expect("Expected 16 bytes") |
| 217 | + ); |
| 218 | + uuid.to_string() |
| 219 | + }) |
| 220 | + }) |
| 221 | + .collect(); |
| 222 | + |
| 223 | + Ok(Arc::new(string_array)) |
| 224 | + } |
203 | 225 | // If Arrow cast supports the cast, delegate the cast to Arrow |
204 | 226 | _ if can_cast_types(from_type, to_type) => { |
205 | 227 | Ok(cast_with_options(&array, to_type, &PARQUET_OPTIONS)?) |
|
0 commit comments