django-ninja
django-ninja copied to clipboard
Data validation takes too long?
- python:3.11.7
- django 4.2.8
- django-ninja:1.1.0
- system:ubuntu 22.04
I calculate validation time like here
One of my api logs that:Total time: 0.1214s. Validation time: 0.1213s.
My model define:
class Messages(models.Model):
id = models.BigAutoField(primary_key=True)
create_time = models.DateTimeField(auto_now_add=True,)
update_time = models.DateTimeField(auto_now=True)
from_user_id = models.IntegerField()
from_user_avatar_url = models.CharField(max_length=256, default="",)
to_user_id = models.IntegerField()
category: MESSAGE_TYPE = models.CharField(
max_length=8,
choices=MESSAGE_TYPE_CHOICE,
help_text="、".join(MESSAGE_TYPE_LIST)) # type: ignore
template_name = models.CharField(max_length=32)
template_id = models.IntegerField()
title = models.CharField( max_length=20)
view_url = models.CharField( max_length=256)
content = models.CharField(max_length=2048)
publish_time = models.DateTimeField()
is_read = models.BooleanField(default=False)
is_interactive = models.BooleanField(default=False)
is_processed = models.BooleanField(default=False)
extra_json = models.JSONField(default=dict )
is_deleted = models.BooleanField(default=False)
class Meta:
ordering = ("-create_time",)
verbose_name = "messages"
verbose_name_plural = verbose_name
input scheme is:
class MessagesIn(ModelSchema):
publish_time: int = Field(...,
gt=1700571683,
lt=6000000000 )
to_user_ids: list[int] = Field(..., min_length=1)
class Meta:
model = Messages
fields = ["from_user_id", "from_user_avatar_url", "category",
"template_name", "template_id", "title", "view_url",
"content", "is_interactive", "is_processed", "extra_json"]
api define is:
@api.post("messages", response={200: None})
@track_time
async def publish_messages(request: ASGIRequest, message_in: MessagesIn):
publish_time = message_in.publish_time
msg_dict = message_in.dict()
msg_dict.pop("to_user_ids")
msg_dict.pop("publish_time")
msgs = [Messages(**msg_dict,
to_user_id=to_user_id,
publish_time=dt.utcfromtimestamp(publish_time))
for to_user_id in message_in.to_user_ids]
assert channel_layer is not None
for msg in msgs:
await msg.asave(force_insert=True)
await channel_layer.group_send(
f"group_{msg.to_user_id}",
{"type": "publish.message",
"message": MessageOut.from_orm(msg).dict()})
return 200, None
Why does data validation require 120 milliseconds?How to optimize it to 20ms
I think there's an issue with your timing decorator (perhaps related to async?), as it seems highly unlikely that you are doing everything in that api function (inc. multiple model saves) in 0.1 ms.
I think there's an issue with your timing decorator (perhaps related to async?), as it seems highly unlikely that you are doing everything in that api function (inc. multiple model saves) in 0.1 ms.
The observation time from the HTTP client is also very long
@shimachao
did you try measuring time without your logic but only validation?
@api.post("messages", response={200: None})
@track_time
async def publish_messages(request: ASGIRequest, message_in: MessagesIn):
# Do nothing
return 200, None
@shimachao
did you try measuring time without your logic but only validation?
@api.post("messages", response={200: None}) @track_time async def publish_messages(request: ASGIRequest, message_in: MessagesIn): # Do nothing return 200, None
I delete async word
@api.post("messages", response={200: None},)
@track_time
def publish_messages(request: ASGIRequest, message_in: MessagesIn):
return 200, None
Total time: 0.1335s. Validation time: 0.1335s.
@shimachao
Well I took your code - and my results are x500 faster than yours - are you sure you shared all the code ?
here is my log:
Total time: 0.0004s. Validation time: 0.0003s.
Here I a full code I test with:
class MessagesIn(ModelSchema):
publish_time: int = Field(..., gt=1700571683, lt=6000000000)
to_user_ids: list[int] = Field(..., min_length=1)
class Meta:
model = Messages
fields = [
"from_user_id",
"from_user_avatar_url",
"category",
"template_name",
"template_id",
"title",
"view_url",
"content",
"is_interactive",
"is_processed",
"extra_json",
]
def request_wrapper(func):
# Logs and prints time by request
@wraps(func)
def wrapper(request, *args, **kwargs):
request._req_start = time.monotonic()
response = func(request, *args, **kwargs)
total_time = time.monotonic() - request._req_start
# if not hasattr(request, '_operation_start'):
# request._operation_start = request._req_start
validation_time = request._operation_start - request._req_start
print(f'Total time: {total_time:0.4f}s. Validation time: {validation_time:0.4f}s.')
return response
return wrapper
def operation_wrapper(func):
# Logs time when operation is called (after validation)
@wraps(func)
def wrapper(request, *args, **kwargs):
request._operation_start = time.monotonic()
return func(request, *args, **kwargs)
return wrapper
def track_time(func):
func = operation_wrapper(func)
func = decorate_view(request_wrapper)(func)
return func
api = NinjaAPI()
@api.post("messages")
@track_time
def publish_messages(request, message_in: MessagesIn):
# Do nothing
return None
also - do you repeat your calls - as first request after start usually takes longer time if you have large projects (While all the imports and data are cached)
@shimachao
Well I took your code - and my results are x500 faster than yours - are you sure you shared all the code ?
here is my log:
Total time: 0.0004s. Validation time: 0.0003s.
Here I a full code I test with:
class MessagesIn(ModelSchema): publish_time: int = Field(..., gt=1700571683, lt=6000000000) to_user_ids: list[int] = Field(..., min_length=1) class Meta: model = Messages fields = [ "from_user_id", "from_user_avatar_url", "category", "template_name", "template_id", "title", "view_url", "content", "is_interactive", "is_processed", "extra_json", ] def request_wrapper(func): # Logs and prints time by request @wraps(func) def wrapper(request, *args, **kwargs): request._req_start = time.monotonic() response = func(request, *args, **kwargs) total_time = time.monotonic() - request._req_start # if not hasattr(request, '_operation_start'): # request._operation_start = request._req_start validation_time = request._operation_start - request._req_start print(f'Total time: {total_time:0.4f}s. Validation time: {validation_time:0.4f}s.') return response return wrapper def operation_wrapper(func): # Logs time when operation is called (after validation) @wraps(func) def wrapper(request, *args, **kwargs): request._operation_start = time.monotonic() return func(request, *args, **kwargs) return wrapper def track_time(func): func = operation_wrapper(func) func = decorate_view(request_wrapper)(func) return func api = NinjaAPI() @api.post("messages") @track_time def publish_messages(request, message_in: MessagesIn): # Do nothing return None
also - do you repeat your calls - as first request after start usually takes longer time if you have large projects (While all the imports and data are cached)
Yes, I repeat many times, it's always too slow. I can't shar all code, but my INSTALLED_APPS may provide more information
INSTALLED_APPS = [
"daphne",
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
# 3rd
"corsheaders",
"channels",
"ninja",
# local
"messages",
]
@shimachao
I do not have explanation
-
do you repeat your calls ? - like maybe only first is slow but the rest are fine
-
try from empty project - without daphne/channels - maybe you got some broker that is slow to connect
-
make sure you not including your await code - as it has potential bottlenecks:
like here
for msg in ...:
await msg.asave(force_insert=True)
await channel_layer.group_send(
f"group_{msg.to_user_id}",
{"type": "publish.message",
"message": MessageOut.from_orm(msg).dict()})
if do not need eventual consistency you may run this code more concurently/faster
tasks = []
for msg in ...
tasks.append(msg.asave(force_insert=True))
tasks.append(
channel_layer.group_send(
f"group_{msg.to_user_id}",
{"type": "publish.message",
"message": MessageOut.from_orm(msg).dict()})
)
await asyncio.gather(*tasks)