django-elasticsearch-dsl-drf icon indicating copy to clipboard operation
django-elasticsearch-dsl-drf copied to clipboard

highlights is getting dropped in DocumentSerializer

Open umaparvat opened this issue 1 year ago • 3 comments

Hi All,

i'm using Django-elasticseach-dsl-drf document serializer for serializing the elastic search document. The requirement is to show the matching query in the response. So i've used highlight option in django-elasticsearch-dsl Search() in views.py. i can see the response with highlights. But when the response comes for deserialization, the key and value is getting dropped.

` class IncidentDocumentSerializer(DocumentSerializer):

  highlights = serializers.SerializerMethodField()

  class Meta:
      document = IncidentDocument

      fields = (
          'incident_number',
          'incident_state',
          'site_id',
          'sub_site_id',
          'account_id',
          'sub_account_id',
          'closed_at',
          'state',
          'short_description',
          'description',
          'incident_parent_id'

      )

  def get_highlights(self, obj):
      try:
          obj.highlights.to_dict()
      except Exception as e:
          print(e, obj.highlights, type(obj.highlights)) # the output in debug mode: exce 'IncidentDocument' object has no attribute 'highlights'

          return {}

`

views.py ` class PaginatedElasticSearchAPIView(APIView, LimitOffsetPagination): serializer_class = None document_class = None ins = None

@abc.abstractmethod
def generate_q_expressions(self, query):
    raise NotImplementedError

def get_search_instance(self, query):
    q = self.ins.generate_q_expressions(query)
    search = self.ins.document_class.search().highlight("*", require_field_match=False).extra(size=10000).query(q)
    return search

def get(self, request, query):
    try:
        search = self.get_search_instance(query)
        response = search.execute()
        for ind, each_data in enumerate(response.hits.hits):
            highlight = each_data.highlight
            for key in highlight:
                #  print(key, highlight[key])
                response.hits.hits[ind][key] = highlight[key][0]
                #print(response.hits.hits[ind][key])

        results = self.paginate_queryset(response, request, view=self)
        #  print("res", len(results))
        serializer = self.ins.serializer_class(results, many=True)

        #print(type(serializer.data))
        return self.get_paginated_response(serializer.data)
    except Exception as e:
        print("exce", e)
        return HttpResponse(e, status=500)

class SearchFactory:

def get_ins(self, type):
    if type == "incident":
        return SearchIncidents()
    elif type == "case":
        return SearchCases()
    elif type == "tac":
        return SearchTacSupportCase()
    else:
        return SearchAll()

class GlobalSearch(PaginatedElasticSearchAPIView): def get(self, request, query): ins = SearchFactory().get_ins(request.GET.get("type", "empty")) self.ins = ins return super().get(request, query)

class SearchIncidents: serializer_class = IncidentDocumentSerializer document = IncidentDocument document_class = IncidentDocument

def generate_q_expressions(self, query):
    return Q(
        'multi_match',
        query=query,
        fields=[
            'incident_number',
            'incident_parent_id'
            'urgency',
            'impact',
            'account_id',
            'site_id',
            'short_description',
            'description',
            'sub_site_id'
        ],
        fuzziness='auto'
    )

`

i've to replace the highlight field and value with document field and value . the serialiser drops the highlight field. This has performance impact. The below code part in the views.py

so i modified the response obj replacing the document field with the corresponding highlight field. In that case, the value which has tag is replaced as normal.

Printed the message before deserialisation whether document key is replaced with highlight key. it has tag. Device <em>AP</em> AP1_812 on Controller AP Noise Floor in the last 15 mins

After desierialisation, the output shows. Device AP AP1_812 on Controller AP Noise Floor in the last 15 mins

the API response from Elastic search for your reference.

Elastic search query: GET itsm_incidents/_search { "query": { "multi_match" : { "query": "AP SP", "fields": [ "incident_number", "description", "short_description" , "incident_number.suggest"] } }, "track_total_hits": true, "highlight": { "require_field_match": false, "fields": { "*": {} } } } Elastic Search response. { "_index": "itsm_incidents", "_id": "INC0061201", "_score": 2.7809696, "_source": { "incident_number": "INC0061201", "incident_parent_id": {}, "incident_state": "In Progress", "site_id": "ACCT0022395", "sub_site_id": "ACCT0022410", "account_id": "ACCT0022391", "sub_account_id": "ACCT0022394", "closed_at": null, "state": "In Progress", "short_description": "ap memory utilization ap memory utilization ap memory utilization ap memory utilization ap memory utilization ap memory utilization ap memory utilization ap memory utilization ap memory utilization ap", "description": "Memory utilization for AP MSAN-AP-1 with serial CNG2CW has been above 1% for about 5 minutes since 2022-05-24 00:07:07 UTC" }, "highlight": { "short_description": [ "<em>ap</em> memory utilization <em>ap</em> memory utilization <em>ap</em> memory utilization <em>ap</em> memory utilization <em>ap</em> memory utilization", "<em>ap</em> memory utilization <em>ap</em> memory utilization <em>ap</em> memory utilization <em>ap</em> memory utilization <em>ap</em>" ], "description": [ "Memory utilization for <em>AP</em> MSAN-<em>AP</em>-1 with serial CNG2CW has been above 1% for about 5 minutes" ] } },

  1. why highlight is being dropped.
  2. why the tag in the text field is getting dropped when using Document serializer ?
  3. How to replace the highlight matched fields with the document fields in serializer ?

umaparvat avatar Sep 20 '23 12:09 umaparvat