diff --git a/datafusion/functions/src/unicode/character_length.rs b/datafusion/functions/src/unicode/character_length.rs index 4ee5995f0a6b..c14a92ffdd75 100644 --- a/datafusion/functions/src/unicode/character_length.rs +++ b/datafusion/functions/src/unicode/character_length.rs @@ -136,56 +136,37 @@ where // string is ASCII only is relatively cheap. // If strings are ASCII only, count bytes instead. let is_array_ascii_only = array.is_ascii(); - let array = if array.null_count() == 0 { + let nulls = array.nulls().cloned(); + let array = { if is_array_ascii_only { let values: Vec<_> = (0..array.len()) .map(|i| { - let value = array.value(i); + // Safety: we are iterating with array.len() so the index is always valid + let value = unsafe { array.value_unchecked(i) }; T::Native::usize_as(value.len()) }) .collect(); - PrimitiveArray::::new(values.into(), None) + PrimitiveArray::::new(values.into(), nulls) } else { let values: Vec<_> = (0..array.len()) .map(|i| { - let value = array.value(i); - if value.is_ascii() { - T::Native::usize_as(value.len()) + // Safety: we are iterating with array.len() so the index is always valid + if array.is_null(i) { + T::default_value() } else { - T::Native::usize_as(value.chars().count()) + let value = unsafe { array.value_unchecked(i) }; + if value.is_empty() { + T::default_value() + } else if value.is_ascii() { + T::Native::usize_as(value.len()) + } else { + T::Native::usize_as(value.chars().count()) + } } }) .collect(); - PrimitiveArray::::new(values.into(), None) + PrimitiveArray::::new(values.into(), nulls) } - } else if is_array_ascii_only { - let values: Vec<_> = (0..array.len()) - .map(|i| { - if array.is_null(i) { - T::default_value() - } else { - let value = array.value(i); - T::Native::usize_as(value.len()) - } - }) - .collect(); - PrimitiveArray::::new(values.into(), array.nulls().cloned()) - } else { - let values: Vec<_> = (0..array.len()) - .map(|i| { - if array.is_null(i) { - T::default_value() - } else { - let value = array.value(i); - if value.is_ascii() { - T::Native::usize_as(value.len()) - } else { - T::Native::usize_as(value.chars().count()) - } - } - }) - .collect(); - PrimitiveArray::::new(values.into(), array.nulls().cloned()) }; Ok(Arc::new(array))