Skip to content

MINOR: add unit tests for chr function #16856

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 24, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions datafusion/functions/src/string/chr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
Some(integer) => {
if integer == 0 {
return exec_err!("null character not permitted.");
} else if integer < 0 {
return exec_err!("negative input not permitted.");
} else {
match core::char::from_u32(integer as u32) {
Some(c) => {
Expand Down Expand Up @@ -132,3 +134,123 @@ impl ScalarUDFImpl for ChrFunc {
self.doc()
}
}

#[cfg(test)]
mod tests {
use super::*;
use arrow::array::{Array, Int64Array, StringArray};
use datafusion_common::assert_contains;

#[test]
fn test_chr_normal() {
let input = Arc::new(Int64Array::from(vec![
Some(65), // A
Some(66), // B
Some(67), // C
Some(128640), // 🚀
Some(8364), // €
Some(945), // α
None, // NULL
Some(32), // space
Some(10), // newline
Some(9), // tab
Some(0x10FFFF), // 0x10FFFF, the largest Unicode code point
]));
let result = chr(&[input]).unwrap();
let string_array = result.as_any().downcast_ref::<StringArray>().unwrap();
let expected = [
"A",
"B",
"C",
"🚀",
"€",
"α",
"",
" ",
"\n",
"\t",
"\u{10ffff}",
];

assert_eq!(string_array.len(), 11);
for (i, e) in expected.iter().enumerate() {
assert_eq!(string_array.value(i), *e);
}
}

#[test]
fn test_chr_error() {
// chr(0) returns an error
let input = Arc::new(Int64Array::from(vec![0]));
let result = chr(&[input]);
assert!(result.is_err());
assert_contains!(
result.err().unwrap().to_string(),
"null character not permitted"
);

// invalid Unicode code points (too large)
let input = Arc::new(Int64Array::from(vec![i64::MAX]));
let result = chr(&[input]);
assert!(result.is_err());
assert_contains!(
result.err().unwrap().to_string(),
"requested character too large for encoding"
);

// invalid Unicode code points (too large) case 2
let input = Arc::new(Int64Array::from(vec![0x10FFFF + 1]));
let result = chr(&[input]);
assert!(result.is_err());
assert_contains!(
result.err().unwrap().to_string(),
"requested character too large for encoding"
);

// invalid Unicode code points (surrogate code point)
// link: <https://learn.microsoft.com/en-us/globalization/encoding/unicode-standard#surrogate-pairs>
let input = Arc::new(Int64Array::from(vec![0xD800 + 1]));
let result = chr(&[input]);
assert!(result.is_err());
assert_contains!(
result.err().unwrap().to_string(),
"requested character too large for encoding"
);

// negative input
let input = Arc::new(Int64Array::from(vec![i64::MIN + 2i64])); // will be 2 if cast to u32
let result = chr(&[input]);
assert!(result.is_err());
assert_contains!(
result.err().unwrap().to_string(),
"negative input not permitted"
);

// negative input case 2
let input = Arc::new(Int64Array::from(vec![-1]));
let result = chr(&[input]);
assert!(result.is_err());
assert_contains!(
result.err().unwrap().to_string(),
"negative input not permitted"
);

// one error with valid values after
let input = Arc::new(Int64Array::from(vec![65, 0, 66])); // A, NULL_CHAR, B
let result = chr(&[input]);
assert!(result.is_err());
assert_contains!(
result.err().unwrap().to_string(),
"null character not permitted"
);
}

#[test]
fn test_chr_empty() {
// empty input array
let input = Arc::new(Int64Array::from(Vec::<i64>::new()));
let result = chr(&[input]).unwrap();
let string_array = result.as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(string_array.len(), 0);
}
}