datafusion icon indicating copy to clipboard operation
datafusion copied to clipboard

decimal calculate overflow but not throw error

Open mmooyyii opened this issue 7 months ago • 1 comments

Describe the bug

  1. make test csv
import csv
import random
import decimal

random.seed(42)

def make_big_random_decimal():
    n = random.randint(1, 1 << 53)
    p = pow(10, random.randint(1, 8))
    return decimal.Decimal(n) / decimal.Decimal(p)


def make_small_random_decimal():
    n = random.randint(1, 100)
    return decimal.Decimal(f"0.{n}")


decimals_1 = [make_big_random_decimal() for _ in range(100000)]
decimals_2 = [make_small_random_decimal() for _ in range(100000)]

SUM = decimal.Decimal(0)

with open("/tmp/decimal.csv", 'w') as f:
    f = csv.writer(f)
    for d1, d2 in zip(decimals_1, decimals_2):
        f.writerow([d1, d2])
        SUM += d1 * d2

print(SUM) // print 3318680488765741748.466457758
  1. calculate sum(d1*d2) in datafusion
use arrow_schema::{DataType, Field, Schema, SchemaBuilder};
use datafusion::error::Result;
use datafusion::prelude::*;

#[tokio::main]
async fn main() -> Result<()> {
    let ctx = SessionContext::new();
    let schema = Schema::new(vec![
        Field::new("d1", DataType::Decimal128(38, 10), false),
        Field::new("d2", DataType::Decimal128(38, 10), false),
    ]);
    let schema = SchemaBuilder::from(schema.fields).finish();
    let options = CsvReadOptions::new()
        .schema(&schema)
        .has_header(false)
        .file_extension(".csv");
    ctx.register_csv("tb", "/tmp/decimal.csv", options).await?;
    ctx.sql("select sum(d1 * d2) from tb").await?.show().await?;
    Ok(())
}

+-----------------------------------------+
| sum(tb.d1 * tb.d2)                      |
+-----------------------------------------+
| -84143180443642886.16728833341768211456 |
+-----------------------------------------+

To Reproduce

No response

Expected behavior

No response

Additional context

No response

mmooyyii avatar Jun 14 '25 00:06 mmooyyii

https://github.com/apache/datafusion/blob/ca0b760af6137c0dbec8b07daa5f48e262420cb5/datafusion/functions-aggregate/src/sum.rs#L309

Use *v = v.add_checked(x)?; ?

mmooyyii avatar Jun 16 '25 05:06 mmooyyii