python_backend
python_backend copied to clipboard
Add back 24.05 response sending path
Before:
"infbench_summary": {
"Requests": 1000,
"Requests/s": 56.781948247187536,
"E2E Latency(s)": 17.61123087300075,
"Avg Latency(s)": 16.54096719381794,
"Total Input Tokens": 127000,
"Total Output Tokens": 128000,
"Avg. Input Tokens / Request": 127.0,
"Avg. Output Tokens / Request": 128.0,
"Stddev Output Tokens / Request": 0.0,
"Input Tokens/s": 7211.307427392818,
"Output Tokens/s": 7268.089375640005,
"Request Issues/s": 2999.715792703272,
"Avg TTFT(s)": 2.12225890412401,
"Avg TPOT(s)": 0.1135331361393223
}
After:
"infbench_summary": {
"Requests": 1000,
"Requests/s": 67.5828421712087,
"E2E Latency(s)": 14.796655007001391,
"Avg Latency(s)": 12.20483882850504,
"Total Input Tokens": 127000,
"Total Output Tokens": 128000,
"Avg. Input Tokens / Request": 127.0,
"Avg. Output Tokens / Request": 128.0,
"Stddev Output Tokens / Request": 0.0,
"Input Tokens/s": 8583.020955743505,
"Output Tokens/s": 8650.603797914713,
"Request Issues/s": 3186.022902867275,
"Avg TTFT(s)": 2.106685652077096,
"Avg TPOT(s)": 0.07951301713722789
}
24.05:
"infbench_summary": {
"Requests": 1000,
"Requests/s": 66.70461264839375,
"E2E Latency(s)": 14.991467011001077,
"Avg Latency(s)": 13.56315037568501,
"Total Input Tokens": 127000,
"Total Output Tokens": 128000,
"Avg. Input Tokens / Request": 127.0,
"Avg. Output Tokens / Request": 128.0,
"Stddev Output Tokens / Request": 0.0,
"Input Tokens/s": 8471.485806346005,
"Output Tokens/s": 8538.1904189944,
"Request Issues/s": 2837.3298150774403,
"Avg TTFT(s)": 2.1147114573539767,
"Avg TPOT(s)": 0.09014518833331522
}