transit-lang-cmp icon indicating copy to clipboard operation
transit-lang-cmp copied to clipboard

Rust Small String Optimization

Open CosmicHorrorDev opened this issue 3 years ago • 0 comments
trafficstars

Since the vast majority of the strings in Data are reasonably small it makes them a good target for small string optimization. I went with the smartstring crate since it seemed to be the most popular

It looks like the vast majority of the time is taken up by JSON serialization after this. I tried a few other things, but none of them panned out. I think switching out the JSON serialization for something hand-written would make the biggest change, but that seems close to gaming the benchmarks at that point

Performance

baseline:

  • Parsing stop times: 566 ms
  • Heavy: 2,534 req/s
  • Light: 15,679 req/s

With this patch:

  • Parsing stop times: 447 ms
  • Heavy: 2,695 req/s
  • Light: 16,520 req/s

The Patch

diff --git a/trustit/Cargo.toml b/trustit/Cargo.toml
index bd33270..4c36b4d 100644
--- a/trustit/Cargo.toml
+++ b/trustit/Cargo.toml
@@ -10,6 +10,7 @@ axum = "0.6.0-rc.2"
 csv = "1"
 futures = "0.3"
 serde = { version = "1.0", features = ["derive"] }
+smartstring = { version = "1.0", features = ["serde"] }
 tokio = { version = "1", features = ["full"] }
 
 [profile.release]
diff --git a/trustit/src/main.rs b/trustit/src/main.rs
index a55967f..9c2b77b 100644
--- a/trustit/src/main.rs
+++ b/trustit/src/main.rs
@@ -4,6 +4,8 @@ use axum::Json;
 use axum::{extract::Path, extract::State, response::IntoResponse, routing::get, Router};
 use csv;
 use serde::Serialize;
+use smartstring::alias::CompactString;
+
 use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::Instant;
@@ -13,16 +15,16 @@ use std::time::Instant;
 // are never accessed
 #[allow(dead_code)]
 struct StopTime {
-    trip_id: String,
-    stop_id: String,
-    arrival: String,
-    departure: String,
+    trip_id: CompactString,
+    stop_id: CompactString,
+    arrival: CompactString,
+    departure: CompactString,
 }
 
 struct Trip {
-    trip_id: String,
-    route_id: String,
-    service_id: String,
+    trip_id: CompactString,
+    route_id: CompactString,
+    service_id: CompactString,
 }
 
 #[derive(Debug, Serialize)]
@@ -42,9 +44,9 @@ struct ScheduleResponse<'data> {
 
 struct Data {
     trips: Vec<Trip>,
-    trips_ix_by_route: HashMap<String, Vec<usize>>,
+    trips_ix_by_route: HashMap<CompactString, Vec<usize>>,
     stop_times: Vec<StopTime>,
-    stop_times_ix_by_trip: HashMap<String, Vec<usize>>,
+    stop_times_ix_by_trip: HashMap<CompactString, Vec<usize>>,
 }
 
 #[tokio::main]
@@ -74,7 +76,7 @@ async fn schedule_handler(
 ) -> axum::response::Response {
     let resp: Vec<TripResponse> = data
         .trips_ix_by_route
-        .get(&route_id)
+        .get(&CompactString::from(route_id))
         .unwrap_or(&Vec::new())
         .iter()
         .map(|trip_ix| {
@@ -104,7 +106,7 @@ async fn schedule_handler(
     Json(resp).into_response()
 }
 
-fn get_stop_times() -> (Vec<StopTime>, HashMap<String, Vec<usize>>) {
+fn get_stop_times() -> (Vec<StopTime>, HashMap<CompactString, Vec<usize>>) {
     let now = Instant::now();
     let mut rdr = csv::ReaderBuilder::new()
         .has_headers(false)
@@ -129,13 +131,13 @@ fn get_stop_times() -> (Vec<StopTime>, HashMap<String, Vec<usize>>) {
         }
     }
 
-    let mut stop_time_by_trip: HashMap<String, Vec<usize>> = HashMap::new();
+    let mut stop_time_by_trip = HashMap::new();
     let mut ix: usize = 0;
 
     let mut stop_times: Vec<StopTime> = Vec::with_capacity(2_000_000);
     for result in rdr_iter {
         let record = result.expect("CSV record");
-        let trip_id: String = record.get(0).expect("row trip").into();
+        let trip_id = CompactString::from(String::from(record.get(0).expect("row trip")));
 
         let trips = stop_time_by_trip
             .entry(trip_id.clone())
@@ -161,7 +163,7 @@ fn get_stop_times() -> (Vec<StopTime>, HashMap<String, Vec<usize>>) {
     return (stop_times, stop_time_by_trip);
 }
 
-fn get_trips() -> (Vec<Trip>, HashMap<String, Vec<usize>>) {
+fn get_trips() -> (Vec<Trip>, HashMap<CompactString, Vec<usize>>) {
     let now = Instant::now();
     let mut rdr = csv::ReaderBuilder::new()
         .has_headers(false)
@@ -186,12 +188,12 @@ fn get_trips() -> (Vec<Trip>, HashMap<String, Vec<usize>>) {
     }
 
     let mut trips: Vec<Trip> = Vec::with_capacity(2_000_000);
-    let mut trip_by_route: HashMap<String, Vec<usize>> = HashMap::new();
+    let mut trip_by_route = HashMap::new();
 
     let mut ix: usize = 0;
     for result in rdr_iter {
         let record = result.expect("CSV record");
-        let route_id: String = record.get(0).expect("row route").into();
+        let route_id = CompactString::from(String::from(record.get(0).expect("row route")));
         let e = trip_by_route.entry(route_id.clone()).or_insert(Vec::new());
         e.push(ix);
         trips.push(Trip {

CosmicHorrorDev avatar Oct 23 '22 19:10 CosmicHorrorDev