@@ -49,6 +49,8 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
49
49
Some ( integer) => {
50
50
if integer == 0 {
51
51
return exec_err ! ( "null character not permitted." ) ;
52
+ } else if integer < 0 {
53
+ return exec_err ! ( "negative input not permitted." ) ;
52
54
} else {
53
55
match core:: char:: from_u32 ( integer as u32 ) {
54
56
Some ( c) => {
@@ -132,3 +134,123 @@ impl ScalarUDFImpl for ChrFunc {
132
134
self . doc ( )
133
135
}
134
136
}
137
+
138
+ #[ cfg( test) ]
139
+ mod tests {
140
+ use super :: * ;
141
+ use arrow:: array:: { Array , Int64Array , StringArray } ;
142
+ use datafusion_common:: assert_contains;
143
+
144
+ #[ test]
145
+ fn test_chr_normal ( ) {
146
+ let input = Arc :: new ( Int64Array :: from ( vec ! [
147
+ Some ( 65 ) , // A
148
+ Some ( 66 ) , // B
149
+ Some ( 67 ) , // C
150
+ Some ( 128640 ) , // 🚀
151
+ Some ( 8364 ) , // €
152
+ Some ( 945 ) , // α
153
+ None , // NULL
154
+ Some ( 32 ) , // space
155
+ Some ( 10 ) , // newline
156
+ Some ( 9 ) , // tab
157
+ Some ( 0x10FFFF ) , // 0x10FFFF, the largest Unicode code point
158
+ ] ) ) ;
159
+ let result = chr ( & [ input] ) . unwrap ( ) ;
160
+ let string_array = result. as_any ( ) . downcast_ref :: < StringArray > ( ) . unwrap ( ) ;
161
+ let expected = [
162
+ "A" ,
163
+ "B" ,
164
+ "C" ,
165
+ "🚀" ,
166
+ "€" ,
167
+ "α" ,
168
+ "" ,
169
+ " " ,
170
+ "\n " ,
171
+ "\t " ,
172
+ "\u{10ffff} " ,
173
+ ] ;
174
+
175
+ assert_eq ! ( string_array. len( ) , 11 ) ;
176
+ for ( i, e) in expected. iter ( ) . enumerate ( ) {
177
+ assert_eq ! ( string_array. value( i) , * e) ;
178
+ }
179
+ }
180
+
181
+ #[ test]
182
+ fn test_chr_error ( ) {
183
+ // chr(0) returns an error
184
+ let input = Arc :: new ( Int64Array :: from ( vec ! [ 0 ] ) ) ;
185
+ let result = chr ( & [ input] ) ;
186
+ assert ! ( result. is_err( ) ) ;
187
+ assert_contains ! (
188
+ result. err( ) . unwrap( ) . to_string( ) ,
189
+ "null character not permitted"
190
+ ) ;
191
+
192
+ // invalid Unicode code points (too large)
193
+ let input = Arc :: new ( Int64Array :: from ( vec ! [ i64 :: MAX ] ) ) ;
194
+ let result = chr ( & [ input] ) ;
195
+ assert ! ( result. is_err( ) ) ;
196
+ assert_contains ! (
197
+ result. err( ) . unwrap( ) . to_string( ) ,
198
+ "requested character too large for encoding"
199
+ ) ;
200
+
201
+ // invalid Unicode code points (too large) case 2
202
+ let input = Arc :: new ( Int64Array :: from ( vec ! [ 0x10FFFF + 1 ] ) ) ;
203
+ let result = chr ( & [ input] ) ;
204
+ assert ! ( result. is_err( ) ) ;
205
+ assert_contains ! (
206
+ result. err( ) . unwrap( ) . to_string( ) ,
207
+ "requested character too large for encoding"
208
+ ) ;
209
+
210
+ // invalid Unicode code points (surrogate code point)
211
+ // link: <https://learn.microsoft.com/en-us/globalization/encoding/unicode-standard#surrogate-pairs>
212
+ let input = Arc :: new ( Int64Array :: from ( vec ! [ 0xD800 + 1 ] ) ) ;
213
+ let result = chr ( & [ input] ) ;
214
+ assert ! ( result. is_err( ) ) ;
215
+ assert_contains ! (
216
+ result. err( ) . unwrap( ) . to_string( ) ,
217
+ "requested character too large for encoding"
218
+ ) ;
219
+
220
+ // negative input
221
+ let input = Arc :: new ( Int64Array :: from ( vec ! [ i64 :: MIN + 2i64 ] ) ) ; // will be 2 if cast to u32
222
+ let result = chr ( & [ input] ) ;
223
+ assert ! ( result. is_err( ) ) ;
224
+ assert_contains ! (
225
+ result. err( ) . unwrap( ) . to_string( ) ,
226
+ "negative input not permitted"
227
+ ) ;
228
+
229
+ // negative input case 2
230
+ let input = Arc :: new ( Int64Array :: from ( vec ! [ -1 ] ) ) ;
231
+ let result = chr ( & [ input] ) ;
232
+ assert ! ( result. is_err( ) ) ;
233
+ assert_contains ! (
234
+ result. err( ) . unwrap( ) . to_string( ) ,
235
+ "negative input not permitted"
236
+ ) ;
237
+
238
+ // one error with valid values after
239
+ let input = Arc :: new ( Int64Array :: from ( vec ! [ 65 , 0 , 66 ] ) ) ; // A, NULL_CHAR, B
240
+ let result = chr ( & [ input] ) ;
241
+ assert ! ( result. is_err( ) ) ;
242
+ assert_contains ! (
243
+ result. err( ) . unwrap( ) . to_string( ) ,
244
+ "null character not permitted"
245
+ ) ;
246
+ }
247
+
248
+ #[ test]
249
+ fn test_chr_empty ( ) {
250
+ // empty input array
251
+ let input = Arc :: new ( Int64Array :: from ( Vec :: < i64 > :: new ( ) ) ) ;
252
+ let result = chr ( & [ input] ) . unwrap ( ) ;
253
+ let string_array = result. as_any ( ) . downcast_ref :: < StringArray > ( ) . unwrap ( ) ;
254
+ assert_eq ! ( string_array. len( ) , 0 ) ;
255
+ }
256
+ }
0 commit comments