@@ -28,7 +28,7 @@ use arrow::array::types::{
28
28
ArrowTimestampType , TimestampMicrosecondType , TimestampMillisecondType ,
29
29
TimestampNanosecondType , TimestampSecondType ,
30
30
} ;
31
- use arrow:: array:: { Array , PrimitiveArray } ;
31
+ use arrow:: array:: { Array , ArrayRef , Int64Array , PrimitiveArray } ;
32
32
use arrow:: datatypes:: DataType :: { self , Null , Timestamp , Utf8 , Utf8View } ;
33
33
use arrow:: datatypes:: TimeUnit :: { self , Microsecond , Millisecond , Nanosecond , Second } ;
34
34
use datafusion_common:: cast:: as_primitive_array;
@@ -60,6 +60,8 @@ use chrono::{
60
60
- hour / HOUR
61
61
- minute / MINUTE
62
62
- second / SECOND
63
+ - millisecond / MILLISECOND
64
+ - microsecond / MICROSECOND
63
65
"#
64
66
) ,
65
67
argument(
@@ -185,6 +187,26 @@ impl ScalarUDFImpl for DateTruncFunc {
185
187
) -> Result < ColumnarValue > {
186
188
let parsed_tz = parse_tz ( tz_opt) ?;
187
189
let array = as_primitive_array :: < T > ( array) ?;
190
+
191
+ // fast path for fine granularities
192
+ if matches ! (
193
+ granularity. as_str( ) ,
194
+ // For morden timezones, it's correct to truncate "minute" in this way.
195
+ // Both datafusion and arrow are ignoring historical timezone's non-minute granularity
196
+ // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
197
+ "second" | "minute" | "millisecond" | "microsecond"
198
+ ) ||
199
+ // In UTC, "hour" and "day" have uniform durations and can be truncated with simple arithmetic
200
+ ( parsed_tz. is_none ( ) && matches ! ( granularity. as_str( ) , "hour" | "day" ) )
201
+ {
202
+ let result = general_date_trunc_array_fine_granularity (
203
+ T :: UNIT ,
204
+ array,
205
+ granularity. as_str ( ) ,
206
+ ) ?;
207
+ return Ok ( ColumnarValue :: Array ( result) ) ;
208
+ }
209
+
188
210
let array: PrimitiveArray < T > = array
189
211
. try_unary ( |x| {
190
212
general_date_trunc ( T :: UNIT , x, parsed_tz, granularity. as_str ( ) )
@@ -423,6 +445,55 @@ fn date_trunc_coarse(granularity: &str, value: i64, tz: Option<Tz>) -> Result<i6
423
445
Ok ( value. unwrap ( ) )
424
446
}
425
447
448
+ /// Fast path for fine granularities (hour and smaller) that can be handled
449
+ /// with simple arithmetic operations without calendar complexity.
450
+ ///
451
+ /// This function is timezone-agnostic and should only be used when:
452
+ /// - No timezone is specified in the input, OR
453
+ /// - The granularity is less than hour as hour can be affected by DST transitions in some cases
454
+ fn general_date_trunc_array_fine_granularity < T : ArrowTimestampType > (
455
+ tu : TimeUnit ,
456
+ array : & PrimitiveArray < T > ,
457
+ granularity : & str ,
458
+ ) -> Result < ArrayRef > {
459
+ let unit = match ( tu, granularity) {
460
+ ( Second , "minute" ) => Some ( Int64Array :: new_scalar ( 60 ) ) ,
461
+ ( Second , "hour" ) => Some ( Int64Array :: new_scalar ( 3600 ) ) ,
462
+ ( Second , "day" ) => Some ( Int64Array :: new_scalar ( 86400 ) ) ,
463
+
464
+ ( Millisecond , "second" ) => Some ( Int64Array :: new_scalar ( 1_000 ) ) ,
465
+ ( Millisecond , "minute" ) => Some ( Int64Array :: new_scalar ( 60_000 ) ) ,
466
+ ( Millisecond , "hour" ) => Some ( Int64Array :: new_scalar ( 3_600_000 ) ) ,
467
+ ( Millisecond , "day" ) => Some ( Int64Array :: new_scalar ( 86_400_000 ) ) ,
468
+
469
+ ( Microsecond , "millisecond" ) => Some ( Int64Array :: new_scalar ( 1_000 ) ) ,
470
+ ( Microsecond , "second" ) => Some ( Int64Array :: new_scalar ( 1_000_000 ) ) ,
471
+ ( Microsecond , "minute" ) => Some ( Int64Array :: new_scalar ( 60_000_000 ) ) ,
472
+ ( Microsecond , "hour" ) => Some ( Int64Array :: new_scalar ( 3_600_000_000 ) ) ,
473
+ ( Microsecond , "day" ) => Some ( Int64Array :: new_scalar ( 86_400_000_000 ) ) ,
474
+
475
+ ( Nanosecond , "microsecond" ) => Some ( Int64Array :: new_scalar ( 1_000 ) ) ,
476
+ ( Nanosecond , "millisecond" ) => Some ( Int64Array :: new_scalar ( 1_000_000 ) ) ,
477
+ ( Nanosecond , "second" ) => Some ( Int64Array :: new_scalar ( 1_000_000_000 ) ) ,
478
+ ( Nanosecond , "minute" ) => Some ( Int64Array :: new_scalar ( 60_000_000_000 ) ) ,
479
+ ( Nanosecond , "hour" ) => Some ( Int64Array :: new_scalar ( 3_600_000_000_000 ) ) ,
480
+ ( Nanosecond , "day" ) => Some ( Int64Array :: new_scalar ( 86_400_000_000_000 ) ) ,
481
+ _ => None ,
482
+ } ;
483
+
484
+ if let Some ( unit) = unit {
485
+ let original_type = array. data_type ( ) ;
486
+ let array = arrow:: compute:: cast ( array, & DataType :: Int64 ) ?;
487
+ let array = arrow:: compute:: kernels:: numeric:: div ( & array, & unit) ?;
488
+ let array = arrow:: compute:: kernels:: numeric:: mul ( & array, & unit) ?;
489
+ let array = arrow:: compute:: cast ( & array, original_type) ?;
490
+ Ok ( array)
491
+ } else {
492
+ // truncate to the same or smaller unit
493
+ Ok ( Arc :: new ( array. clone ( ) ) )
494
+ }
495
+ }
496
+
426
497
// truncates a single value with the given timeunit to the specified granularity
427
498
fn general_date_trunc (
428
499
tu : TimeUnit ,
@@ -884,6 +955,21 @@ mod tests {
884
955
"2018-11-04T02:00:00-02" ,
885
956
] ,
886
957
) ,
958
+ (
959
+ vec![
960
+ "2024-10-26T23:30:00Z" ,
961
+ "2024-10-27T00:30:00Z" ,
962
+ "2024-10-27T01:30:00Z" ,
963
+ "2024-10-27T02:30:00Z" ,
964
+ ] ,
965
+ Some ( "Asia/Kathmandu" . into( ) ) , // UTC+5:45
966
+ vec![
967
+ "2024-10-27T05:00:00+05:45" ,
968
+ "2024-10-27T06:00:00+05:45" ,
969
+ "2024-10-27T07:00:00+05:45" ,
970
+ "2024-10-27T08:00:00+05:45" ,
971
+ ] ,
972
+ ) ,
887
973
] ;
888
974
889
975
cases. iter ( ) . for_each ( |( original, tz_opt, expected) | {
0 commit comments