transit-lang-cmp
transit-lang-cmp copied to clipboard
Rust Small String Optimization
trafficstars
Since the vast majority of the strings in Data are reasonably small it makes them a good target for small string optimization. I went with the smartstring crate since it seemed to be the most popular
It looks like the vast majority of the time is taken up by JSON serialization after this. I tried a few other things, but none of them panned out. I think switching out the JSON serialization for something hand-written would make the biggest change, but that seems close to gaming the benchmarks at that point
Performance
baseline:
- Parsing stop times: 566 ms
- Heavy: 2,534 req/s
- Light: 15,679 req/s
With this patch:
- Parsing stop times: 447 ms
- Heavy: 2,695 req/s
- Light: 16,520 req/s
The Patch
diff --git a/trustit/Cargo.toml b/trustit/Cargo.toml
index bd33270..4c36b4d 100644
--- a/trustit/Cargo.toml
+++ b/trustit/Cargo.toml
@@ -10,6 +10,7 @@ axum = "0.6.0-rc.2"
csv = "1"
futures = "0.3"
serde = { version = "1.0", features = ["derive"] }
+smartstring = { version = "1.0", features = ["serde"] }
tokio = { version = "1", features = ["full"] }
[profile.release]
diff --git a/trustit/src/main.rs b/trustit/src/main.rs
index a55967f..9c2b77b 100644
--- a/trustit/src/main.rs
+++ b/trustit/src/main.rs
@@ -4,6 +4,8 @@ use axum::Json;
use axum::{extract::Path, extract::State, response::IntoResponse, routing::get, Router};
use csv;
use serde::Serialize;
+use smartstring::alias::CompactString;
+
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Instant;
@@ -13,16 +15,16 @@ use std::time::Instant;
// are never accessed
#[allow(dead_code)]
struct StopTime {
- trip_id: String,
- stop_id: String,
- arrival: String,
- departure: String,
+ trip_id: CompactString,
+ stop_id: CompactString,
+ arrival: CompactString,
+ departure: CompactString,
}
struct Trip {
- trip_id: String,
- route_id: String,
- service_id: String,
+ trip_id: CompactString,
+ route_id: CompactString,
+ service_id: CompactString,
}
#[derive(Debug, Serialize)]
@@ -42,9 +44,9 @@ struct ScheduleResponse<'data> {
struct Data {
trips: Vec<Trip>,
- trips_ix_by_route: HashMap<String, Vec<usize>>,
+ trips_ix_by_route: HashMap<CompactString, Vec<usize>>,
stop_times: Vec<StopTime>,
- stop_times_ix_by_trip: HashMap<String, Vec<usize>>,
+ stop_times_ix_by_trip: HashMap<CompactString, Vec<usize>>,
}
#[tokio::main]
@@ -74,7 +76,7 @@ async fn schedule_handler(
) -> axum::response::Response {
let resp: Vec<TripResponse> = data
.trips_ix_by_route
- .get(&route_id)
+ .get(&CompactString::from(route_id))
.unwrap_or(&Vec::new())
.iter()
.map(|trip_ix| {
@@ -104,7 +106,7 @@ async fn schedule_handler(
Json(resp).into_response()
}
-fn get_stop_times() -> (Vec<StopTime>, HashMap<String, Vec<usize>>) {
+fn get_stop_times() -> (Vec<StopTime>, HashMap<CompactString, Vec<usize>>) {
let now = Instant::now();
let mut rdr = csv::ReaderBuilder::new()
.has_headers(false)
@@ -129,13 +131,13 @@ fn get_stop_times() -> (Vec<StopTime>, HashMap<String, Vec<usize>>) {
}
}
- let mut stop_time_by_trip: HashMap<String, Vec<usize>> = HashMap::new();
+ let mut stop_time_by_trip = HashMap::new();
let mut ix: usize = 0;
let mut stop_times: Vec<StopTime> = Vec::with_capacity(2_000_000);
for result in rdr_iter {
let record = result.expect("CSV record");
- let trip_id: String = record.get(0).expect("row trip").into();
+ let trip_id = CompactString::from(String::from(record.get(0).expect("row trip")));
let trips = stop_time_by_trip
.entry(trip_id.clone())
@@ -161,7 +163,7 @@ fn get_stop_times() -> (Vec<StopTime>, HashMap<String, Vec<usize>>) {
return (stop_times, stop_time_by_trip);
}
-fn get_trips() -> (Vec<Trip>, HashMap<String, Vec<usize>>) {
+fn get_trips() -> (Vec<Trip>, HashMap<CompactString, Vec<usize>>) {
let now = Instant::now();
let mut rdr = csv::ReaderBuilder::new()
.has_headers(false)
@@ -186,12 +188,12 @@ fn get_trips() -> (Vec<Trip>, HashMap<String, Vec<usize>>) {
}
let mut trips: Vec<Trip> = Vec::with_capacity(2_000_000);
- let mut trip_by_route: HashMap<String, Vec<usize>> = HashMap::new();
+ let mut trip_by_route = HashMap::new();
let mut ix: usize = 0;
for result in rdr_iter {
let record = result.expect("CSV record");
- let route_id: String = record.get(0).expect("row route").into();
+ let route_id = CompactString::from(String::from(record.get(0).expect("row route")));
let e = trip_by_route.entry(route_id.clone()).or_insert(Vec::new());
e.push(ix);
trips.push(Trip {