chrono
chrono copied to clipboard
Extra slow Utc to Local conversion
I have to read large parquet files with 500M-1B records where one of the columns is a datetime. By convention for parquet timestamps are saved as Int96 UTC timestamp. When reading file I have to convert it to local time to make some simple calculations. Reading Int96 from file, converting it to milliseconds, creating DateTime<Utc> operations are fast, however converting to Local timezone is incredibly slow:
use std::time::Instant;
use chrono::{DateTime, Local, Utc};
fn create_timestamps() -> Vec<i64> {
let mut timestamps = Vec::with_capacity(5_000_000);
for _ in 0..100_000 {
for j in 0..50 {
timestamps.push(1_716_757_200_000 + 60_000 * j);
}
}
timestamps
}
fn convert_to_utc(timestamps: &Vec<i64>, dates: &mut Vec<DateTime<Utc>>) {
for ×tamp in timestamps {
dates.push(DateTime::<Utc>::from_timestamp_millis(timestamp).unwrap());
}
}
fn convert_to_local(timestamps: &Vec<i64>, dates: &mut Vec<DateTime<Local>>) {
for ×tamp in timestamps {
dates.push(DateTime::<Utc>::from_timestamp_millis(timestamp).unwrap().with_timezone(&Local));
}
}
fn main() {
// chrono = "0.4.38"
let timestamps = create_timestamps();
let mut utc_dates = Vec::with_capacity(5_000_000);
let start_time = Instant::now();
// emulate reading 500M rows
for _ in 0..100 {
convert_to_utc(×tamps, &mut utc_dates);
utc_dates.clear();
}
let elapsed_time = start_time.elapsed();
println!("convert to utc: {:?}", elapsed_time);
let mut local_dates = Vec::with_capacity(5_000_000);
let start_time = Instant::now();
// emulate reading 500M rows
for _ in 0..100 {
convert_to_local(×tamps, &mut local_dates);
local_dates.clear();
}
let elapsed_time = start_time.elapsed();
println!("convert to local: {:?}", elapsed_time);
}
> cargo --version
cargo 1.78.0 (54d8815d0 2024-03-26)
> rustc --version
rustc 1.78.0 (9b00956e5 2024-04-29)
> cargo run --release
Finished `release` profile [optimized] target(s) in 0.01s
Running `target\release\chrono-tz-convert.exe`
convert to utc: 5.517348s
convert to local: **119.8019992s**
Here is c# version for comparison (note that it is x6 times faster):
namespace DateTimeBenchMark
{
internal class Program
{
static void Main(string[] args)
{
var timestamps = new List<long>(5_000_000);
for (var i = 0; i < 100_000; i++)
{
for (var j = 0; j < 50; j++)
{
timestamps.Add(1_716_757_200_000 + 60_000 * j);
}
}
var utc_datetimes = new List<DateTime>(5_000_000);
var stopwatch = System.Diagnostics.Stopwatch.StartNew();
for (var i = 0; i < 100; i++)
{
foreach (var timestamp in timestamps)
{
utc_datetimes.Add(DateTime.UnixEpoch.AddMicroseconds(timestamp));
}
}
stopwatch.Stop();
Console.WriteLine($"convert to utc: {stopwatch.Elapsed}");
var local_datetimes = new List<DateTime>(5_000_000);
stopwatch.Restart();
for (var i = 0; i < 100; i++)
{
foreach (var timestamp in timestamps)
{
local_datetimes.Add(DateTime.UnixEpoch.AddMicroseconds(timestamp).ToLocalTime());
}
local_datetimes.Clear();
}
stopwatch.Stop();
Console.WriteLine($"convert to local: {stopwatch.Elapsed}");
}
}
}
convert to utc: 00:00:03.2969532
convert to local: **00:00:19.5144110**