Skip to content

Commit 27a78b4

Browse files
waynexiaadriangb
authored andcommitted
Chore: add unit tests for chr function (apache#16856)
* MINOR: add unit tests for chr function Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * handle negative input Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * fix clippy Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * add more test cases Signed-off-by: Ruihang Xia <waynestxia@gmail.com> * add a surrogate codepoint case Signed-off-by: Ruihang Xia <waynestxia@gmail.com> --------- Signed-off-by: Ruihang Xia <waynestxia@gmail.com>
1 parent 2a379e1 commit 27a78b4

File tree

1 file changed

+122
-0
lines changed
  • datafusion/functions/src/string

1 file changed

+122
-0
lines changed

datafusion/functions/src/string/chr.rs

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
4949
Some(integer) => {
5050
if integer == 0 {
5151
return exec_err!("null character not permitted.");
52+
} else if integer < 0 {
53+
return exec_err!("negative input not permitted.");
5254
} else {
5355
match core::char::from_u32(integer as u32) {
5456
Some(c) => {
@@ -132,3 +134,123 @@ impl ScalarUDFImpl for ChrFunc {
132134
self.doc()
133135
}
134136
}
137+
138+
#[cfg(test)]
139+
mod tests {
140+
use super::*;
141+
use arrow::array::{Array, Int64Array, StringArray};
142+
use datafusion_common::assert_contains;
143+
144+
#[test]
145+
fn test_chr_normal() {
146+
let input = Arc::new(Int64Array::from(vec![
147+
Some(65), // A
148+
Some(66), // B
149+
Some(67), // C
150+
Some(128640), // 🚀
151+
Some(8364), // €
152+
Some(945), // α
153+
None, // NULL
154+
Some(32), // space
155+
Some(10), // newline
156+
Some(9), // tab
157+
Some(0x10FFFF), // 0x10FFFF, the largest Unicode code point
158+
]));
159+
let result = chr(&[input]).unwrap();
160+
let string_array = result.as_any().downcast_ref::<StringArray>().unwrap();
161+
let expected = [
162+
"A",
163+
"B",
164+
"C",
165+
"🚀",
166+
"€",
167+
"α",
168+
"",
169+
" ",
170+
"\n",
171+
"\t",
172+
"\u{10ffff}",
173+
];
174+
175+
assert_eq!(string_array.len(), 11);
176+
for (i, e) in expected.iter().enumerate() {
177+
assert_eq!(string_array.value(i), *e);
178+
}
179+
}
180+
181+
#[test]
182+
fn test_chr_error() {
183+
// chr(0) returns an error
184+
let input = Arc::new(Int64Array::from(vec![0]));
185+
let result = chr(&[input]);
186+
assert!(result.is_err());
187+
assert_contains!(
188+
result.err().unwrap().to_string(),
189+
"null character not permitted"
190+
);
191+
192+
// invalid Unicode code points (too large)
193+
let input = Arc::new(Int64Array::from(vec![i64::MAX]));
194+
let result = chr(&[input]);
195+
assert!(result.is_err());
196+
assert_contains!(
197+
result.err().unwrap().to_string(),
198+
"requested character too large for encoding"
199+
);
200+
201+
// invalid Unicode code points (too large) case 2
202+
let input = Arc::new(Int64Array::from(vec![0x10FFFF + 1]));
203+
let result = chr(&[input]);
204+
assert!(result.is_err());
205+
assert_contains!(
206+
result.err().unwrap().to_string(),
207+
"requested character too large for encoding"
208+
);
209+
210+
// invalid Unicode code points (surrogate code point)
211+
// link: <https://learn.microsoft.com/en-us/globalization/encoding/unicode-standard#surrogate-pairs>
212+
let input = Arc::new(Int64Array::from(vec![0xD800 + 1]));
213+
let result = chr(&[input]);
214+
assert!(result.is_err());
215+
assert_contains!(
216+
result.err().unwrap().to_string(),
217+
"requested character too large for encoding"
218+
);
219+
220+
// negative input
221+
let input = Arc::new(Int64Array::from(vec![i64::MIN + 2i64])); // will be 2 if cast to u32
222+
let result = chr(&[input]);
223+
assert!(result.is_err());
224+
assert_contains!(
225+
result.err().unwrap().to_string(),
226+
"negative input not permitted"
227+
);
228+
229+
// negative input case 2
230+
let input = Arc::new(Int64Array::from(vec![-1]));
231+
let result = chr(&[input]);
232+
assert!(result.is_err());
233+
assert_contains!(
234+
result.err().unwrap().to_string(),
235+
"negative input not permitted"
236+
);
237+
238+
// one error with valid values after
239+
let input = Arc::new(Int64Array::from(vec![65, 0, 66])); // A, NULL_CHAR, B
240+
let result = chr(&[input]);
241+
assert!(result.is_err());
242+
assert_contains!(
243+
result.err().unwrap().to_string(),
244+
"null character not permitted"
245+
);
246+
}
247+
248+
#[test]
249+
fn test_chr_empty() {
250+
// empty input array
251+
let input = Arc::new(Int64Array::from(Vec::<i64>::new()));
252+
let result = chr(&[input]).unwrap();
253+
let string_array = result.as_any().downcast_ref::<StringArray>().unwrap();
254+
assert_eq!(string_array.len(), 0);
255+
}
256+
}

0 commit comments

Comments
 (0)