Skip to content

Commit fb96263

Browse files
MazterQyoumcheshkov
authored andcommitted
fix: Trim ".0" postfix when converting Float to Utf8
1 parent 55763a1 commit fb96263

File tree

3 files changed

+64
-7
lines changed

3 files changed

+64
-7
lines changed

arrow/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ flatbuffers = { version = "=2.1.2", optional = true }
5555
hex = "0.4"
5656
comfy-table = { version = "5.0", optional = true, default-features = false }
5757
pyo3 = { version = "0.16", optional = true }
58-
lexical-core = "^0.8"
58+
lexical-core = { version = "^0.8", features = ["format"] }
5959
multiversion = "0.6.1"
6060
bitflags = "1.2.1"
6161

arrow/src/compute/kernels/cast.rs

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,9 @@ use crate::datatypes::*;
4949
use crate::error::{ArrowError, Result};
5050
use crate::{array::*, compute::take};
5151
use crate::{
52-
buffer::Buffer, util::display::array_value_to_string,
53-
util::serialization::lexical_to_string,
52+
buffer::Buffer,
53+
util::display::array_value_to_string,
54+
util::serialization::{float_lexical_to_string, lexical_to_string},
5455
};
5556
use num::{NumCast, ToPrimitive};
5657

@@ -832,8 +833,8 @@ pub fn cast_with_options(
832833
Int16 => cast_numeric_to_string::<Int16Type, i32>(array),
833834
Int32 => cast_numeric_to_string::<Int32Type, i32>(array),
834835
Int64 => cast_numeric_to_string::<Int64Type, i32>(array),
835-
Float32 => cast_numeric_to_string::<Float32Type, i32>(array),
836-
Float64 => cast_numeric_to_string::<Float64Type, i32>(array),
836+
Float32 => cast_float_to_string::<Float32Type, i32>(array),
837+
Float64 => cast_float_to_string::<Float64Type, i32>(array),
837838
Timestamp(unit, _) => match unit {
838839
TimeUnit::Nanosecond => {
839840
cast_timestamp_to_string::<TimestampNanosecondType, i32>(array)
@@ -888,8 +889,8 @@ pub fn cast_with_options(
888889
Int16 => cast_numeric_to_string::<Int16Type, i64>(array),
889890
Int32 => cast_numeric_to_string::<Int32Type, i64>(array),
890891
Int64 => cast_numeric_to_string::<Int64Type, i64>(array),
891-
Float32 => cast_numeric_to_string::<Float32Type, i64>(array),
892-
Float64 => cast_numeric_to_string::<Float64Type, i64>(array),
892+
Float32 => cast_float_to_string::<Float32Type, i64>(array),
893+
Float64 => cast_float_to_string::<Float64Type, i64>(array),
893894
Timestamp(unit, _) => match unit {
894895
TimeUnit::Nanosecond => {
895896
cast_timestamp_to_string::<TimestampNanosecondType, i64>(array)
@@ -1592,6 +1593,36 @@ where
15921593
.collect()
15931594
}
15941595

1596+
/// Cast float types to Utf8
1597+
fn cast_float_to_string<FROM, OffsetSize>(array: &ArrayRef) -> Result<ArrayRef>
1598+
where
1599+
FROM: ArrowFloatNumericType,
1600+
FROM::Native:
1601+
lexical_core::ToLexicalWithOptions<Options = lexical_core::WriteFloatOptions>,
1602+
OffsetSize: StringOffsetSizeTrait,
1603+
{
1604+
Ok(Arc::new(float_to_string_cast::<FROM, OffsetSize>(
1605+
array
1606+
.as_any()
1607+
.downcast_ref::<PrimitiveArray<FROM>>()
1608+
.unwrap(),
1609+
)))
1610+
}
1611+
1612+
fn float_to_string_cast<T, OffsetSize>(
1613+
from: &PrimitiveArray<T>,
1614+
) -> GenericStringArray<OffsetSize>
1615+
where
1616+
T: ArrowPrimitiveType + ArrowFloatNumericType,
1617+
T::Native:
1618+
lexical_core::ToLexicalWithOptions<Options = lexical_core::WriteFloatOptions>,
1619+
OffsetSize: StringOffsetSizeTrait,
1620+
{
1621+
from.iter()
1622+
.map(|maybe_value| maybe_value.map(float_lexical_to_string))
1623+
.collect()
1624+
}
1625+
15951626
/// Cast numeric types to Utf8
15961627
fn cast_string_to_numeric<T, Offset: StringOffsetSizeTrait>(
15971628
from: &ArrayRef,

arrow/src/util/serialization.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,29 @@ pub fn lexical_to_string<N: lexical_core::ToLexical>(n: N) -> String {
3131
String::from_utf8_unchecked(buf)
3232
}
3333
}
34+
35+
/// Converts float type to a `String`
36+
pub fn float_lexical_to_string<
37+
N: lexical_core::ToLexicalWithOptions<Options = lexical_core::WriteFloatOptions>,
38+
>(
39+
n: N,
40+
) -> String {
41+
let mut buf = Vec::<u8>::with_capacity(N::FORMATTED_SIZE_DECIMAL);
42+
unsafe {
43+
// JUSTIFICATION
44+
// Benefit
45+
// Allows using the faster serializer lexical core and convert to string
46+
// Soundness
47+
// Length of buf is set as written length afterwards. lexical_core
48+
// creates a valid string, so doesn't need to be checked.
49+
let slice = std::slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.capacity());
50+
let options = lexical_core::WriteFloatOptions::builder()
51+
.trim_floats(true)
52+
.build()
53+
.unwrap();
54+
const FORMAT: u128 = lexical_core::format::POSTGRESQL;
55+
let len = lexical_core::write_with_options::<_, FORMAT>(n, slice, &options).len();
56+
buf.set_len(len);
57+
String::from_utf8_unchecked(buf)
58+
}
59+
}

0 commit comments

Comments
 (0)