Skip to content

Commit c90713b

Browse files
andygroveviirya
andauthored
perf: Faster decimal precision overflow checks (#6419)
* add benchmark * add optimization * fix * fix * cargo fmt * clippy * Update arrow-data/src/decimal.rs Co-authored-by: Liang-Chi Hsieh <[email protected]> * optimize to avoid allocating an idx variable * revert change to public api * fix error in rustdoc --------- Co-authored-by: Liang-Chi Hsieh <[email protected]>
1 parent d727503 commit c90713b

File tree

7 files changed

+224
-42
lines changed

7 files changed

+224
-42
lines changed

arrow-array/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,7 @@ harness = false
7171
[[bench]]
7272
name = "fixed_size_list_array"
7373
harness = false
74+
75+
[[bench]]
76+
name = "decimal_overflow"
77+
harness = false
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow_array::builder::{Decimal128Builder, Decimal256Builder};
19+
use arrow_buffer::i256;
20+
use criterion::*;
21+
22+
fn criterion_benchmark(c: &mut Criterion) {
23+
let len = 8192;
24+
let mut builder_128 = Decimal128Builder::with_capacity(len);
25+
let mut builder_256 = Decimal256Builder::with_capacity(len);
26+
for i in 0..len {
27+
if i % 10 == 0 {
28+
builder_128.append_value(i128::MAX);
29+
builder_256.append_value(i256::from_i128(i128::MAX));
30+
} else {
31+
builder_128.append_value(i as i128);
32+
builder_256.append_value(i256::from_i128(i as i128));
33+
}
34+
}
35+
let array_128 = builder_128.finish();
36+
let array_256 = builder_256.finish();
37+
38+
c.bench_function("validate_decimal_precision_128", |b| {
39+
b.iter(|| black_box(array_128.validate_decimal_precision(8)));
40+
});
41+
c.bench_function("null_if_overflow_precision_128", |b| {
42+
b.iter(|| black_box(array_128.null_if_overflow_precision(8)));
43+
});
44+
c.bench_function("validate_decimal_precision_256", |b| {
45+
b.iter(|| black_box(array_256.validate_decimal_precision(8)));
46+
});
47+
c.bench_function("null_if_overflow_precision_256", |b| {
48+
b.iter(|| black_box(array_256.null_if_overflow_precision(8)));
49+
});
50+
}
51+
52+
criterion_group!(benches, criterion_benchmark);
53+
criterion_main!(benches);

arrow-array/src/array/primitive_array.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1570,9 +1570,7 @@ impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> {
15701570
/// Validates the Decimal Array, if the value of slot is overflow for the specified precision, and
15711571
/// will be casted to Null
15721572
pub fn null_if_overflow_precision(&self, precision: u8) -> Self {
1573-
self.unary_opt::<_, T>(|v| {
1574-
(T::validate_decimal_precision(v, precision).is_ok()).then_some(v)
1575-
})
1573+
self.unary_opt::<_, T>(|v| T::is_valid_decimal_precision(v, precision).then_some(v))
15761574
}
15771575

15781576
/// Returns [`Self::value`] formatted as a string

arrow-array/src/types.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@ use crate::temporal_conversions::as_datetime_with_timezone;
2424
use crate::timezone::Tz;
2525
use crate::{ArrowNativeTypeOp, OffsetSizeTrait};
2626
use arrow_buffer::{i256, Buffer, OffsetBuffer};
27-
use arrow_data::decimal::{validate_decimal256_precision, validate_decimal_precision};
27+
use arrow_data::decimal::{
28+
is_validate_decimal256_precision, is_validate_decimal_precision, validate_decimal256_precision,
29+
validate_decimal_precision,
30+
};
2831
use arrow_data::{validate_binary_view, validate_string_view};
2932
use arrow_schema::{
3033
ArrowError, DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
@@ -1194,6 +1197,9 @@ pub trait DecimalType:
11941197

11951198
/// Validates that `value` contains no more than `precision` decimal digits
11961199
fn validate_decimal_precision(value: Self::Native, precision: u8) -> Result<(), ArrowError>;
1200+
1201+
/// Determines whether `value` contains no more than `precision` decimal digits
1202+
fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool;
11971203
}
11981204

11991205
/// Validate that `precision` and `scale` are valid for `T`
@@ -1256,6 +1262,10 @@ impl DecimalType for Decimal128Type {
12561262
fn validate_decimal_precision(num: i128, precision: u8) -> Result<(), ArrowError> {
12571263
validate_decimal_precision(num, precision)
12581264
}
1265+
1266+
fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool {
1267+
is_validate_decimal_precision(value, precision)
1268+
}
12591269
}
12601270

12611271
impl ArrowPrimitiveType for Decimal128Type {
@@ -1286,6 +1296,10 @@ impl DecimalType for Decimal256Type {
12861296
fn validate_decimal_precision(num: i256, precision: u8) -> Result<(), ArrowError> {
12871297
validate_decimal256_precision(num, precision)
12881298
}
1299+
1300+
fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool {
1301+
is_validate_decimal256_precision(value, precision)
1302+
}
12891303
}
12901304

12911305
impl ArrowPrimitiveType for Decimal256Type {

arrow-cast/src/cast/decimal.rs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -336,11 +336,7 @@ where
336336
if cast_options.safe {
337337
let iter = from.iter().map(|v| {
338338
v.and_then(|v| parse_string_to_decimal_native::<T>(v, scale as usize).ok())
339-
.and_then(|v| {
340-
T::validate_decimal_precision(v, precision)
341-
.is_ok()
342-
.then_some(v)
343-
})
339+
.and_then(|v| T::is_valid_decimal_precision(v, precision).then_some(v))
344340
});
345341
// Benefit:
346342
// 20% performance improvement
@@ -430,7 +426,7 @@ where
430426
(mul * v.as_())
431427
.round()
432428
.to_i128()
433-
.filter(|v| Decimal128Type::validate_decimal_precision(*v, precision).is_ok())
429+
.filter(|v| Decimal128Type::is_valid_decimal_precision(*v, precision))
434430
})
435431
.with_precision_and_scale(precision, scale)
436432
.map(|a| Arc::new(a) as ArrayRef)
@@ -473,7 +469,7 @@ where
473469
array
474470
.unary_opt::<_, Decimal256Type>(|v| {
475471
i256::from_f64((v.as_() * mul).round())
476-
.filter(|v| Decimal256Type::validate_decimal_precision(*v, precision).is_ok())
472+
.filter(|v| Decimal256Type::is_valid_decimal_precision(*v, precision))
477473
})
478474
.with_precision_and_scale(precision, scale)
479475
.map(|a| Arc::new(a) as ArrayRef)

arrow-cast/src/cast/mod.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -327,9 +327,10 @@ where
327327
let array = if scale < 0 {
328328
match cast_options.safe {
329329
true => array.unary_opt::<_, D>(|v| {
330-
v.as_().div_checked(scale_factor).ok().and_then(|v| {
331-
(D::validate_decimal_precision(v, precision).is_ok()).then_some(v)
332-
})
330+
v.as_()
331+
.div_checked(scale_factor)
332+
.ok()
333+
.and_then(|v| (D::is_valid_decimal_precision(v, precision)).then_some(v))
333334
}),
334335
false => array.try_unary::<_, D, _>(|v| {
335336
v.as_()
@@ -340,9 +341,10 @@ where
340341
} else {
341342
match cast_options.safe {
342343
true => array.unary_opt::<_, D>(|v| {
343-
v.as_().mul_checked(scale_factor).ok().and_then(|v| {
344-
(D::validate_decimal_precision(v, precision).is_ok()).then_some(v)
345-
})
344+
v.as_()
345+
.mul_checked(scale_factor)
346+
.ok()
347+
.and_then(|v| (D::is_valid_decimal_precision(v, precision)).then_some(v))
346348
}),
347349
false => array.try_unary::<_, D, _>(|v| {
348350
v.as_()

0 commit comments

Comments
 (0)