Skip to content

Commit 943a08f

Browse files
committed
Allow base64 encoding of fixedsizebinary arrays
1 parent 769f367 commit 943a08f

File tree

1 file changed

+48
-1
lines changed
  • datafusion/functions/src/encoding

1 file changed

+48
-1
lines changed

datafusion/functions/src/encoding/inner.rs

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@ use base64::{
2929
Engine as _,
3030
};
3131
use datafusion_common::{
32-
cast::{as_generic_binary_array, as_generic_string_array},
32+
cast::{
33+
as_fixed_size_binary_array, as_generic_binary_array, as_generic_string_array,
34+
},
3335
not_impl_err, plan_err,
3436
utils::take_function_args,
3537
};
@@ -105,6 +107,7 @@ impl ScalarUDFImpl for EncodeFunc {
105107
Utf8View => Utf8,
106108
Binary => Utf8,
107109
LargeBinary => LargeUtf8,
110+
FixedSizeBinary(_) => Utf8,
108111
Null => Null,
109112
_ => {
110113
return plan_err!(
@@ -135,6 +138,9 @@ impl ScalarUDFImpl for EncodeFunc {
135138
DataType::LargeUtf8 => Ok(vec![DataType::LargeUtf8, DataType::Utf8]),
136139
DataType::Binary => Ok(vec![DataType::Binary, DataType::Utf8]),
137140
DataType::LargeBinary => Ok(vec![DataType::LargeBinary, DataType::Utf8]),
141+
DataType::FixedSizeBinary(sz) => {
142+
Ok(vec![DataType::FixedSizeBinary(*sz), DataType::Utf8])
143+
}
138144
_ => plan_err!(
139145
"1st argument should be Utf8 or Binary or Null, got {:?}",
140146
arg_types[0]
@@ -246,6 +252,9 @@ fn encode_process(value: &ColumnarValue, encoding: Encoding) -> Result<ColumnarV
246252
DataType::Utf8View => encoding.encode_utf8_array::<i32>(a.as_ref()),
247253
DataType::Binary => encoding.encode_binary_array::<i32>(a.as_ref()),
248254
DataType::LargeBinary => encoding.encode_binary_array::<i64>(a.as_ref()),
255+
DataType::FixedSizeBinary(_) => {
256+
encoding.encode_fixed_size_binary_array(a.as_ref())
257+
}
249258
other => exec_err!(
250259
"Unsupported data type {other:?} for function encode({encoding})"
251260
),
@@ -265,6 +274,9 @@ fn encode_process(value: &ColumnarValue, encoding: Encoding) -> Result<ColumnarV
265274
),
266275
ScalarValue::LargeBinary(a) => Ok(encoding
267276
.encode_large_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))),
277+
ScalarValue::FixedSizeBinary(_, a) => Ok(
278+
encoding.encode_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))
279+
),
268280
other => exec_err!(
269281
"Unsupported data type {other:?} for function encode({encoding})"
270282
),
@@ -401,6 +413,15 @@ impl Encoding {
401413
Ok(ColumnarValue::Array(array))
402414
}
403415

416+
fn encode_fixed_size_binary_array(self, value: &dyn Array) -> Result<ColumnarValue> {
417+
let input_value = as_fixed_size_binary_array(value)?;
418+
let array: ArrayRef = match self {
419+
Self::Base64 => encode_to_array!(base64_encode, input_value),
420+
Self::Hex => encode_to_array!(hex_encode, input_value),
421+
};
422+
Ok(ColumnarValue::Array(array))
423+
}
424+
404425
fn encode_utf8_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
405426
where
406427
T: OffsetSizeTrait,
@@ -553,3 +574,29 @@ fn decode(args: &[ColumnarValue]) -> Result<ColumnarValue> {
553574
}?;
554575
decode_process(expression, encoding)
555576
}
577+
578+
#[cfg(test)]
579+
mod tests {
580+
#[test]
581+
fn test_encode_fsb() {
582+
use super::*;
583+
584+
let value = vec![0u8; 16];
585+
let array = arrow::array::FixedSizeBinaryArray::try_from_sparse_iter_with_size(
586+
vec![Some(value)].into_iter(),
587+
16,
588+
)
589+
.unwrap();
590+
let value = ColumnarValue::Array(Arc::new(array));
591+
592+
let ColumnarValue::Array(result) =
593+
encode_process(&value, Encoding::Base64).unwrap()
594+
else {
595+
panic!("unexpected value");
596+
};
597+
598+
let string_array = result.as_any().downcast_ref::<StringArray>().unwrap();
599+
let result_value = string_array.value(0);
600+
assert_eq!(result_value, "AAAAAAAAAAAAAAAAAAAAAA");
601+
}
602+
}

0 commit comments

Comments
 (0)