qdrant-client
qdrant-client copied to clipboard
new: decode json responses with orjson
Deploy Preview for poetic-froyo-8baba7 ready!
| Name | Link |
|---|---|
| Latest commit | c065773a5f02918283d7c8b6f444f828713cdfaf |
| Latest deploy log | https://app.netlify.com/sites/poetic-froyo-8baba7/deploys/67b32771f9558d0008040d0d |
| Deploy Preview | https://deploy-preview-902--poetic-froyo-8baba7.netlify.app |
| Preview on mobile | Toggle QR Code...Use your smartphone camera to open QR code link. |
To edit notification comments on pull requests, go to your Netlify site configuration.
out of curiosity, how much faster is this?
out of curiosity, how much faster is this?
@agourlay I am about to post a note here on it
But as of now some of the common responses can be decoded up to 3x times faster (it is only fair to those with with_payload=True or with_vectors=True, since it only affects decoding)
I benchmarked it on scroll queries with with_vectors=True and with_payload=True for a limit of 5, 10, 20 and 30 records
I used randomly generated vectors with dim = 512 and dim = 1024, generated payload with a fixture from the tests Benchmarking results include just the json decoding step (response.json()) and the whole scroll request
The results are:
{
"decode": {
"512": {
"5": {
"json": 4.8874993808567524e-05,
"orjson": 2.366595435887575e-05
},
"10": {
"json": 7.658399408683181e-05,
"orjson": 4.395801806822419e-05
},
"20": {
"json": 0.0020279159653000534,
"orjson": 0.0007863339851610363
},
"30": {
"json": 0.00019866699585691094,
"orjson": 8.283299393951893e-05
}
},
"1024": {
"5": {
"json": 5.583302117884159e-05,
"orjson": 2.7542002499103546e-05
},
"10": {
"json": 6.133399438112974e-05,
"orjson": 2.8709007892757654e-05
},
"20": {
"json": 0.00012050004443153739,
"orjson": 4.754198016598821e-05
},
"30": {
"json": 0.00016829196829348803,
"orjson": 6.799999391660094e-05
}
}
},
"scroll": {
"512": {
"5": {
"json": 0.0043065829668194056,
"orjson": 0.0034446659847162664
},
"10": {
"json": 0.002439292031340301,
"orjson": 0.0023802079958841205
},
"20": {
"json": 0.0047377090086229146,
"orjson": 0.0032056660274975
},
"30": {
"json": 0.003125959017779678,
"orjson": 0.0026631250511854887
}
},
"1024": {
"5": {
"json": 0.00181554100709036,
"orjson": 0.0019507919787429273
},
"10": {
"json": 0.001797415956389159,
"orjson": 0.0016768340137787163
},
"20": {
"json": 0.0022662909468635917,
"orjson": 0.0019526249961927533
},
"30": {
"json": 0.0038724589976482093,
"orjson": 0.003087958029936999
}
}
}
}
I generated the input with:
from typing import Optional, Any
import numpy as np
import json
from tests.fixtures.payload import one_random_payload_please
NUM_RECORDS = 1_000
def generate_numpy(output_file: Optional[str] = None, dim: int = 512) -> np.ndarray:
arr = np.random.random((NUM_RECORDS, dim))
if output_file is not None:
with open(output_file, 'wb') as f:
np.save(f, arr)
return arr
def generate_json(output_file: Optional[str] = None) -> list[dict[str, Any]]:
data = []
if output_file is not None:
with open(output_file, 'w') as f:
for i in range(NUM_RECORDS):
record = one_random_payload_please(i)
if 'rand_datetime' in record:
record['rand_datetime'] = str(record['rand_datetime'])
try:
f.write(json.dumps(record) + '\n')
except Exception as e:
from pprint import pprint
pprint(record)
raise e
data.append(record)
else:
data = [one_random_payload_please(i) for i in range(NUM_RECORDS)]
return data
if __name__ == '__main__':
generate_json('payload.jsonl')
for dim in [512, 1024]:
generate_numpy(f'vectors_{dim}.npy', dim=dim)
Going to add benchmarking results with limit = 100, 1_000 and 10_000
{
"decode": {
"512": {
"100": {
"json": 0.002053125004749745,
"orjson": 0.0008660830208100379
},
"1000": {
"json": 0.016057875007390976,
"orjson": 0.0042370830196887255
},
"10000": {
"json": 0.23768579197349027,
"orjson": 0.1923259579925798
}
},
"1024": {
"100": {
"json": 0.0010743339662440121,
"orjson": 0.000690957996994257
},
"1000": {
"json": 0.0073619160102680326,
"orjson": 0.0038023749948479235
},
"10000": {
"json": 0.26379175001056865,
"orjson": 0.22926875000121072
}
}
},
"scroll": {
"512": {
"100": {
"json": 0.00935820903396234,
"orjson": 0.00594179198378697
},
"1000": {
"json": 0.03989037498831749,
"orjson": 0.023745999962557107
},
"10000": {
"json": 0.4403287079767324,
"orjson": 0.37456325005041435
}
},
"1024": {
"100": {
"json": 0.005677750043105334,
"orjson": 0.0063817090122029185
},
"1000": {
"json": 0.026618165953550488,
"orjson": 0.027344707981683314
},
"10000": {
"json": 0.4460842920234427,
"orjson": 0.4439006670145318
}
}
}
}
Out of curiosity, have you considered evaluating msgspec for this?
Could orjson be hidden behind feature flag? something like http-perf maybe. Not everyone uses http client