Skip to content

Commit deca696

Browse files
committed
fix(iceberg-datafusion): handle timestamp predicates from DF
DataFusion sometimes passes dates as string literals, but can also pass timestamp ScalarValues, which need to be converted to predicates correctly in order to enable partition pruning.
1 parent 833739e commit deca696

File tree

4 files changed

+369
-5
lines changed

4 files changed

+369
-5
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/iceberg/src/spec/values.rs

Lines changed: 227 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1195,11 +1195,57 @@ impl Datum {
11951195
(PrimitiveLiteral::Long(val), _, PrimitiveType::Int) => {
11961196
Ok(Datum::i64_to_i32(*val))
11971197
}
1198-
(PrimitiveLiteral::Long(val), _, PrimitiveType::Timestamp) => {
1199-
Ok(Datum::timestamp_micros(*val))
1200-
}
1201-
(PrimitiveLiteral::Long(val), _, PrimitiveType::Timestamptz) => {
1202-
Ok(Datum::timestamptz_micros(*val))
1198+
(PrimitiveLiteral::Long(val), source_type, target_type) => {
1199+
match (source_type, target_type) {
1200+
(_, PrimitiveType::Long) => Ok(Datum::long(*val)),
1201+
(
1202+
PrimitiveType::Long
1203+
| PrimitiveType::Timestamp
1204+
| PrimitiveType::Timestamptz,
1205+
PrimitiveType::Timestamp,
1206+
) => Ok(Datum::timestamp_micros(*val)),
1207+
(
1208+
PrimitiveType::Long
1209+
| PrimitiveType::Timestamp
1210+
| PrimitiveType::Timestamptz,
1211+
PrimitiveType::Timestamptz,
1212+
) => Ok(Datum::timestamptz_micros(*val)),
1213+
(
1214+
PrimitiveType::Long
1215+
| PrimitiveType::TimestampNs
1216+
| PrimitiveType::TimestamptzNs,
1217+
PrimitiveType::TimestampNs,
1218+
) => Ok(Datum::timestamp_nanos(*val)),
1219+
(
1220+
PrimitiveType::Long
1221+
| PrimitiveType::TimestampNs
1222+
| PrimitiveType::TimestamptzNs,
1223+
PrimitiveType::TimestamptzNs,
1224+
) => Ok(Datum::timestamptz_nanos(*val)),
1225+
(
1226+
PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs,
1227+
PrimitiveType::Timestamp,
1228+
) => Ok(Datum::timestamp_micros(val / 1000)),
1229+
(
1230+
PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs,
1231+
PrimitiveType::Timestamptz,
1232+
) => Ok(Datum::timestamptz_micros(val / 1000)),
1233+
(
1234+
PrimitiveType::Timestamp | PrimitiveType::Timestamptz,
1235+
PrimitiveType::TimestampNs,
1236+
) => Ok(Datum::timestamp_nanos(val * 1000)),
1237+
(
1238+
PrimitiveType::Timestamp | PrimitiveType::Timestamptz,
1239+
PrimitiveType::TimestamptzNs,
1240+
) => Ok(Datum::timestamptz_nanos(val * 1000)),
1241+
_ => Err(Error::new(
1242+
ErrorKind::DataInvalid,
1243+
format!(
1244+
"Can't convert datum from {} type to {} type.",
1245+
self.r#type, target_primitive_type
1246+
),
1247+
)),
1248+
}
12031249
}
12041250
// Let's wait with nano's until this clears up: https://github.com/apache/iceberg/pull/11775
12051251
(PrimitiveLiteral::Int128(val), _, PrimitiveType::Long) => {
@@ -3942,4 +3988,180 @@ mod tests {
39423988

39433989
assert_eq!(double_sorted, double_expected);
39443990
}
3991+
3992+
#[test]
3993+
fn test_datum_timestamp_nanos_convert_to_timestamp_micros() {
3994+
let datum = Datum::timestamp_nanos(12345000);
3995+
3996+
let result = datum.to(&Primitive(PrimitiveType::Timestamp)).unwrap();
3997+
3998+
let expected = Datum::timestamp_micros(12345);
3999+
4000+
assert_eq!(result, expected);
4001+
}
4002+
4003+
#[test]
4004+
fn test_datum_timestamp_nanos_convert_to_timestamptz_micros() {
4005+
let datum = Datum::timestamp_nanos(12345000);
4006+
4007+
let result = datum.to(&Primitive(PrimitiveType::Timestamptz)).unwrap();
4008+
4009+
let expected = Datum::timestamptz_micros(12345);
4010+
4011+
assert_eq!(result, expected);
4012+
}
4013+
4014+
#[test]
4015+
fn test_datum_timestamptz_nanos_convert_to_timestamp_micros() {
4016+
let datum = Datum::timestamptz_nanos(12345000);
4017+
4018+
let result = datum.to(&Primitive(PrimitiveType::Timestamp)).unwrap();
4019+
4020+
let expected = Datum::timestamp_micros(12345);
4021+
4022+
assert_eq!(result, expected);
4023+
}
4024+
4025+
#[test]
4026+
fn test_datum_timestamptz_nanos_convert_to_timestamptz_micros() {
4027+
let datum = Datum::timestamptz_nanos(12345000);
4028+
4029+
let result = datum.to(&Primitive(PrimitiveType::Timestamptz)).unwrap();
4030+
4031+
let expected = Datum::timestamptz_micros(12345);
4032+
4033+
assert_eq!(result, expected);
4034+
}
4035+
4036+
#[test]
4037+
fn test_datum_timestamp_micros_convert_to_timestamp_nanos() {
4038+
let datum = Datum::timestamp_micros(12345);
4039+
4040+
let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap();
4041+
4042+
let expected = Datum::timestamp_nanos(12345000);
4043+
4044+
assert_eq!(result, expected);
4045+
}
4046+
4047+
#[test]
4048+
fn test_datum_timestamp_micros_convert_to_timestamptz_nanos() {
4049+
let datum = Datum::timestamp_micros(12345);
4050+
4051+
let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap();
4052+
4053+
let expected = Datum::timestamptz_nanos(12345000);
4054+
4055+
assert_eq!(result, expected);
4056+
}
4057+
4058+
#[test]
4059+
fn test_datum_timestamptz_micros_convert_to_timestamp_nanos() {
4060+
let datum = Datum::timestamptz_micros(12345);
4061+
4062+
let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap();
4063+
4064+
let expected = Datum::timestamp_nanos(12345000);
4065+
4066+
assert_eq!(result, expected);
4067+
}
4068+
4069+
#[test]
4070+
fn test_datum_timestamptz_micros_convert_to_timestamptz_nanos() {
4071+
let datum = Datum::timestamptz_micros(12345);
4072+
4073+
let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap();
4074+
4075+
let expected = Datum::timestamptz_nanos(12345000);
4076+
4077+
assert_eq!(result, expected);
4078+
}
4079+
4080+
#[test]
4081+
fn test_datum_timestamp_nanos_convert_to_timestamp_nanos() {
4082+
let datum = Datum::timestamp_nanos(12345);
4083+
4084+
let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap();
4085+
4086+
let expected = Datum::timestamp_nanos(12345);
4087+
4088+
assert_eq!(result, expected);
4089+
}
4090+
4091+
#[test]
4092+
fn test_datum_timestamp_nanos_convert_to_timestamptz_nanos() {
4093+
let datum = Datum::timestamp_nanos(12345);
4094+
4095+
let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap();
4096+
4097+
let expected = Datum::timestamptz_nanos(12345);
4098+
4099+
assert_eq!(result, expected);
4100+
}
4101+
4102+
#[test]
4103+
fn test_datum_timestamptz_nanos_convert_to_timestamp_nanos() {
4104+
let datum = Datum::timestamptz_nanos(12345);
4105+
4106+
let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap();
4107+
4108+
let expected = Datum::timestamp_nanos(12345);
4109+
4110+
assert_eq!(result, expected);
4111+
}
4112+
4113+
#[test]
4114+
fn test_datum_timestamptz_nanos_convert_to_timestamptz_nanos() {
4115+
let datum = Datum::timestamptz_nanos(12345);
4116+
4117+
let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap();
4118+
4119+
let expected = Datum::timestamptz_nanos(12345);
4120+
4121+
assert_eq!(result, expected);
4122+
}
4123+
4124+
#[test]
4125+
fn test_datum_long_convert_to_timestamp_nanos() {
4126+
let datum = Datum::long(12345);
4127+
4128+
let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap();
4129+
4130+
let expected = Datum::timestamp_nanos(12345);
4131+
4132+
assert_eq!(result, expected);
4133+
}
4134+
4135+
#[test]
4136+
fn test_datum_long_convert_to_timestamptz_nanos() {
4137+
let datum = Datum::long(12345);
4138+
4139+
let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap();
4140+
4141+
let expected = Datum::timestamptz_nanos(12345);
4142+
4143+
assert_eq!(result, expected);
4144+
}
4145+
4146+
#[test]
4147+
fn test_datum_timestamp_nanos_to_micros() {
4148+
let datum = Datum::timestamp_nanos(12345678);
4149+
4150+
let result = datum.to(&Primitive(PrimitiveType::Timestamp)).unwrap();
4151+
4152+
let expected = Datum::timestamp_micros(12345);
4153+
4154+
assert_eq!(result, expected);
4155+
}
4156+
4157+
#[test]
4158+
fn test_datum_timestamp_micros_to_nanos() {
4159+
let datum = Datum::timestamp_micros(12345);
4160+
4161+
let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap();
4162+
4163+
let expected = Datum::timestamp_nanos(12345000);
4164+
4165+
assert_eq!(result, expected);
4166+
}
39454167
}

crates/integrations/datafusion/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ repository = { workspace = true }
3131
[dependencies]
3232
anyhow = { workspace = true }
3333
async-trait = { workspace = true }
34+
chrono = { workspace = true }
3435
datafusion = { workspace = true }
3536
futures = { workspace = true }
3637
iceberg = { workspace = true }

0 commit comments

Comments
 (0)