httpx icon indicating copy to clipboard operation
httpx copied to clipboard

Improve async performance.

Open MarkusSintonen opened this issue 8 months ago • 24 comments

There seems to be some performance issues in httpx (0.27.0) as it has much worse performance than aiohttp (3.9.4) with concurrently running requests (in python 3.12). The following benchmark shows how running 20 requests concurrently is over 10x slower with httpx compared to aiohttp. The benchmark has very basic httpx usage for doing multiple GET requests with limited concurrency. The script outputs a figure showing how duration of each GET request has a huge duration variance with httpx.

Figure_1

# requirements.txt:
# httpx == 0.27.0
# aiohttp == 3.9.4
# matplotlib == 3.9.0
# 
# 1. start server: python bench.py server
# 2. run client test: python bench.py client

import asyncio
import sys
from typing import Any, Coroutine, Iterator
import aiohttp
import time
import httpx
from aiohttp import web
import matplotlib.pyplot as plt


PORT = 1234
URL = f"http://localhost:{PORT}/req"
RESP = "a" * 2000
REQUESTS = 100
CONCURRENCY = 20


def run_web_server():
    async def handle(_request):
        return web.Response(text=RESP)

    app = web.Application()
    app.add_routes([web.get('/req', handle)])
    web.run_app(app, host="localhost", port=PORT)


def duration(start: float) -> int:
    return int((time.monotonic() - start) * 1000)


async def run_requests(axis: plt.Axes):
    async def gather_limited_concurrency(coros: Iterator[Coroutine[Any, Any, Any]]):
        sem = asyncio.Semaphore(CONCURRENCY)
        async def coro_with_sem(coro):
            async with sem:
                return await coro
        return await asyncio.gather(*(coro_with_sem(c) for c in coros))

    async def httpx_get(session: httpx.AsyncClient, timings: list[int]):
        start = time.monotonic()
        res = await session.request("GET", URL)
        assert len(await res.aread()) == len(RESP)
        assert res.status_code == 200, f"status_code={res.status_code}"
        timings.append(duration(start))

    async def aiohttp_get(session: aiohttp.ClientSession, timings: list[int]):
        start = time.monotonic()
        async with session.request("GET", URL) as res:
            assert len(await res.read()) == len(RESP)
            assert res.status == 200, f"status={res.status}"
        timings.append(duration(start))

    async with httpx.AsyncClient() as session:
        # warmup
        await asyncio.gather(*(httpx_get(session, []) for _ in range(REQUESTS)))

        timings = []
        start = time.monotonic()
        await gather_limited_concurrency((httpx_get(session, timings) for _ in range(REQUESTS)))
        axis.plot([*range(REQUESTS)], timings, label=f"httpx (tot={duration(start)}ms)")

    async with aiohttp.ClientSession() as session:
        # warmup
        await asyncio.gather(*(aiohttp_get(session, []) for _ in range(REQUESTS)))

        timings = []
        start = time.monotonic()
        await gather_limited_concurrency((aiohttp_get(session, timings) for _ in range(REQUESTS)))
        axis.plot([*range(REQUESTS)], timings, label=f"aiohttp (tot={duration(start)}ms)")


def main(mode: str):
    assert mode in {"server", "client"}, f"invalid mode: {mode}"

    if mode == "server":
        run_web_server()
    else:
        fig, ax = plt.subplots()
        asyncio.run(run_requests(ax))
        plt.legend(loc="upper left")
        ax.set_xlabel("# request")
        ax.set_ylabel("[ms]")
        plt.show()

    print("DONE", flush=True)


if __name__ == "__main__":
    assert len(sys.argv) == 2, f"Usage: {sys.argv[0]} server|client"
    main(sys.argv[1])

I found the following issue but seems its not related as the workaround doesnt make a difference here https://github.com/encode/httpx/issues/838#issuecomment-1291224189

MarkusSintonen avatar Jun 04 '24 09:06 MarkusSintonen