From e39aa7b7bb9c52ad7d248d6cf74f77f695f56de5 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Wed, 26 Oct 2016 11:06:37 -0700 Subject: [PATCH 1/2] PARQUET-675: Add INTERVAL_YEAR_MONTH and INTERVAL_DAY_TIME types --- src/main/thrift/parquet.thrift | 35 +++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/main/thrift/parquet.thrift b/src/main/thrift/parquet.thrift index ac4d50eb4..678dc7e15 100644 --- a/src/main/thrift/parquet.thrift +++ b/src/main/thrift/parquet.thrift @@ -162,19 +162,48 @@ enum ConvertedType { BSON = 20; /** + * @Deprecated: use INTERVAL_YEAR_MONTH or INTERVAL_DAY_TIME + * since the SQL standard defines either YEAR_MONTH or DAY_TIME unit. + * This is deprecated in favor of those 2 types + * * An interval of time - * + * * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 * This data is composed of three separate little endian unsigned * integers. Each stores a component of a duration of time. The first * integer identifies the number of months associated with the duration, * the second identifies the number of days associated with the duration - * and the third identifies the number of milliseconds associated with + * and the third identifies the number of milliseconds associated with * the provided duration. This duration of time is independent of any * particular timezone or date. */ INTERVAL = 21; - + + /** + * An interval of time with a year-month unit. + * + * This type annotates data stored as an INT32 + * This data is stored as a little endian unsigned + * integer identifying the number of months associated with the duration. + * This duration of time is independent of any + * particular timezone or date. + */ + INTERVAL_YEAR_MONTH = 22; + + /** + * An interval of time with days-milliseconds unit + * + * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 8 + * This data is composed of two separate little endian unsigned + * integers. Each stores a component of a duration of time. + * + * The first identifies the number of days associated with the duration + * and the second identifies the number of milliseconds associated with + * the provided duration. This duration of time is independent of any + * particular timezone or date. + */ + INTERVAL_DAY_TIME = 23; + } /** From ddc9c475e9009d8b1e9cfb13e957c6087e48b6a3 Mon Sep 17 00:00:00 2001 From: Julien Le Dem Date: Thu, 27 Oct 2016 19:08:16 -0700 Subject: [PATCH 2/2] add LogicalTypes description --- LogicalTypes.md | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/LogicalTypes.md b/LogicalTypes.md index 8c73f693d..d016f751b 100644 --- a/LogicalTypes.md +++ b/LogicalTypes.md @@ -130,8 +130,12 @@ milliseconds from the Unix epoch, 00:00:00.000 on 1 January 1970, UTC. microsecond precision. It must annotate an `int64` that stores the number of microseconds from the Unix epoch, 00:00:00.000000 on 1 January 1970, UTC. -### INTERVAL +### INTERVAL types +#### INTERVAL +`INTERVAL` is deprecated. Please use the more precise `INTERVAL_YEAR_MONTH` and +`INTERVAL_DAY_TIME` per SQL spec. +For historical reference: `INTERVAL` is used for an interval of time. It must annotate a `fixed_len_byte_array` of length 12. This array stores three little-endian unsigned integers that represent durations at different granularities of time. @@ -144,6 +148,25 @@ example, there is no requirement that a large number of days should be expressed as a mix of months and days because there is not a constant conversion from days to months. +#### INTERVAL_YEAR_MONTH +`INTERVAL_YEAR_MONTH` describes an interval of time with year-month unit. +It annotates data stored as an INT32 +This data is stored as a little endian unsigned +integer identifying the number of months associated with the duration. +This duration of time is independent of any +particular timezone or date. + +#### INTERVAL_DAY_TIME +`INTERVAL_DAY_TIME` describes interval of time with days-milliseconds unit +This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 8 +This data is composed of two separate little endian unsigned +integers. Each stores a component of a duration of time. + +The first identifies the number of days associated with the duration +and the second identifies the number of milliseconds associated with +the provided duration. This duration of time is independent of any +particular timezone or date. + ## Embedded Types ### JSON