django-elasticsearch-dsl-drf
django-elasticsearch-dsl-drf copied to clipboard
Conditional filtering
Hi,
I have a document/index 'companies' which contains multiple annual_accounts
entities which contain multiple fields
and a closing_year
. Each fields
has a value
and a code
.
I want to retrieve companies which have an annual_accounts.closing_year
equals to 2020 AND for this annual_accounts
element a fields
with code "AA" AND for the same fields
a value greater than 1,000,000.
Here is the elasticsearch query generated:
GET companies/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "annual_accounts.fields",
"query": {
"terms": {
"annual_accounts.fields.code.raw": [
"AA"
]
}
}
}
},
{
"nested": {
"path": "annual_accounts",
"query": {
"terms": {
"annual_accounts.closing_year": [
"2020"
]
}
}
}
},
{
"nested": {
"path": "annual_accounts.fields",
"query": {
"range": {
"annual_accounts.fields.value": {
"gte": "1000000"
}
}
}
}
}
]
}
},
"track_total_hits": true,
"from": 0,
"size": 25
}
What is the best way to do this kind of filtering?
Is the way is to create an index for fields
to being able to filter the 2 conditions (code
and value
)? If this is a solution how can I link the field to an annual_accounts
and a company
in another index?
I reworked my nested document to combined the closing_year
and the code
in the same field to filter just once. The given request is like this:
GET companies/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "annual_accounts.fields",
"query": {
"terms": {
"annual_accounts.fields.code.raw": [
"2020:AA"
]
}
}
}
},
{
"nested": {
"path": "annual_accounts.fields",
"query": {
"range": {
"annual_accounts.fields.value": {
"gte": "1000000"
}
}
}
}
}
]
}
},
"track_total_hits": true,
"from": 0,
"size": 25
}
After manually searching with the dev_tools in kibana, my taget request will be this:
GET companies/_search
{
"query": {
"nested": {
"path": "annual_accounts.fields",
"query": {
"bool": {
"must": [
{
"range": {
"annual_accounts.fields.value": {
"gte": "1000000"
}
}
},
{
"terms": {
"annual_accounts.fields.code.raw": [
"2020:AA"
]
}
}
]
}
}
}
},
"track_total_hits": true,
"from": 0,
"size": 25
}
Is it possible to achieve with the current implementation of NesteedFilter or should I create a custom filter to do this?
I made a new FilterBackend to be able to group filters on a specific nested, here is the code. Let me know if you think it could be integrated in the lib.
New group
key in the nested_filter_fields
nested_filter_fields = {
"annual_accounts.fields.code": {
"field": "annual_accounts.fields.code.raw",
"path": "annual_accounts.fields",
"group": "annual_accounts.fields",
},
"annual_accounts.fields.value": {
"field": "annual_accounts.fields.value",
"path": "annual_accounts.fields",
"group": "annual_accounts.fields",
},
...
GroupNestedFilteringFilterBackend
class GroupNestedFilteringFilterBackend(NestedFilteringFilterBackend):
def get_filter_query_params(self, request, view):
query_params = request.query_params.copy()
filter_query_params = {}
filter_fields = self.prepare_filter_fields(view)
for query_param in query_params:
query_param_list = self.split_lookup_filter(query_param, maxsplit=1)
field_name = query_param_list[0]
if field_name in filter_fields:
lookup_param = None
if len(query_param_list) > 1:
lookup_param = query_param_list[1]
valid_lookups = filter_fields[field_name]["lookups"]
nested_path = self.get_filter_field_nested_path(
filter_fields, field_name
)
if lookup_param is None or lookup_param in valid_lookups:
values = [
__value.strip()
for __value in query_params.getlist(query_param)
if __value.strip() != ""
]
if values:
filter_query_params[query_param] = {
"lookup": lookup_param,
"values": values,
"field": filter_fields[field_name].get("field", field_name),
"type": view.mapping,
"path": nested_path,
"group": filter_fields[field_name].get("group"),
}
return filter_query_params
def filter_queryset(self, request, queryset, view):
filter_query_params = self.get_filter_query_params(request, view)
groups = {}
for options in filter_query_params.values():
if options["group"] and options["group"] not in groups:
groups[options["group"]] = {
"path": options["path"],
"filters": [],
}
if (
isinstance(options["values"], (list, tuple))
and options["lookup"] is None
):
if not options["group"]:
queryset = self.apply_filter_terms(
queryset, options, options["values"]
)
continue
if isinstance(options["values"], (list, tuple)):
values = options["values"]
else:
values = self.split_lookup_complex_value(options["values"])
groups[options["group"]]["filters"].append(
Q("terms", **{options["field"]: values})
)
continue
for value in options["values"]:
if options["lookup"] == LOOKUP_FILTER_TERMS:
if not options["group"]:
queryset = self.apply_filter_terms(queryset, options, value)
continue
if isinstance(value, (list, tuple)):
values = value
else:
values = self.split_lookup_complex_value(value)
groups[options["group"]]["filters"].append(
Q("terms", **{options["field"]: values})
)
elif options["lookup"] in (
LOOKUP_FILTER_PREFIX,
LOOKUP_QUERY_STARTSWITH,
):
if not options["group"]:
queryset = self.apply_filter_prefix(queryset, options, value)
continue
groups[options["group"]]["filters"].append(
Q("prefix", **{options["field"]: value})
)
elif options["lookup"] == LOOKUP_FILTER_RANGE:
if not options["group"]:
queryset = self.apply_filter_range(queryset, options, value)
continue
groups[options["group"]]["filters"].append(
Q("range", **{options["field"]: self.get_range_params(value)})
)
elif options["lookup"] == LOOKUP_FILTER_REGEXP:
if not options["group"]:
queryset = self.apply_filter_regexp(queryset, options, value)
continue
groups[options["group"]]["filters"].append(
Q("regexp", **{options["field"]: value})
)
elif options["lookup"] == LOOKUP_FILTER_EXISTS:
if not options["group"]:
queryset = self.apply_query_exists(queryset, options, value)
continue
value_lower = value.lower()
if value_lower in TRUE_VALUES:
groups[options["group"]]["filters"].append(
Q("exists", field=options["field"])
)
elif value_lower in FALSE_VALUES:
groups[options["group"]]["filters"].append(
~Q("exists", field=options["field"])
)
return queryset
elif options["lookup"] == LOOKUP_FILTER_WILDCARD:
if not options["group"]:
queryset = self.apply_query_wildcard(queryset, options, value)
continue
groups[options["group"]]["filters"].append(
Q("wildcard", **{options["field"]: value})
)
elif options["lookup"] == LOOKUP_QUERY_CONTAINS:
if not options["group"]:
queryset = self.apply_query_contains(queryset, options, value)
continue
groups[options["group"]]["filters"].append(
Q("wildcard", **{options["field"]: "*{}*".format(value)})
)
elif options["lookup"] == LOOKUP_QUERY_IN:
if not options["group"]:
queryset = self.apply_query_in(queryset, options, value)
continue
values = self.split_lookup_complex_value(value)
queries = []
for vl in values:
queries.append(Q("term", **{options["field"]: vl}))
if queries:
groups[options["group"]]["filters"].append(
six.moves.reduce(operator.or_, queries)
)
elif options["lookup"] == LOOKUP_QUERY_GT:
if not options["group"]:
queryset = self.apply_query_gt(queryset, options, value)
continue
groups[options["group"]]["filters"].append(
Q(
"range",
**{options["field"]: self.get_gte_lte_params(value, "gt")},
)
)
elif options["lookup"] == LOOKUP_QUERY_GTE:
if not options["group"]:
queryset = self.apply_query_gte(queryset, options, value)
continue
groups[options["group"]]["filters"].append(
Q(
"range",
**{options["field"]: self.get_gte_lte_params(value, "gte")},
)
)
elif options["lookup"] == LOOKUP_QUERY_LT:
if not options["group"]:
queryset = self.apply_query_lt(queryset, options, value)
continue
groups[options["group"]]["filters"].append(
Q(
"range",
**{options["field"]: self.get_gte_lte_params(value, "lt")},
)
)
elif options["lookup"] == LOOKUP_QUERY_LTE:
if not options["group"]:
queryset = self.apply_query_lte(queryset, options, value)
continue
groups[options["group"]]["filters"].append(
Q(
"range",
**{options["field"]: self.get_gte_lte_params(value, "lte")},
)
)
elif options["lookup"] == LOOKUP_QUERY_ENDSWITH:
if not options["group"]:
queryset = self.apply_query_endswith(queryset, options, value)
continue
groups[options["group"]]["filters"].append(
Q("wildcard", **{options["field"]: "*{}".format(value)})
)
elif options["lookup"] == LOOKUP_QUERY_ISNULL:
if not options["group"]:
queryset = self.apply_query_isnull(queryset, options, value)
continue
value_lower = value.lower()
if value_lower in TRUE_VALUES:
groups[options["group"]]["filters"].append(
~Q("exists", field=options["field"])
)
elif value_lower in FALSE_VALUES:
groups[options["group"]]["filters"].append(
Q("exists", field=options["field"])
)
elif options["lookup"] == LOOKUP_QUERY_EXCLUDE:
if not options["group"]:
queryset = self.apply_query_exclude(queryset, options, value)
continue
values = self.split_lookup_complex_value(value)
queries = []
for vl in values:
queries.append(~Q("term", **{options["field"]: vl}))
if queries:
groups[options["group"]]["filters"].append(
six.moves.reduce(operator.or_, queries)
)
else:
if not options["group"]:
queryset = self.apply_filter_term(queryset, options, value)
continue
groups[options["group"]]["filters"].append(
Q(
"term",
**{options["field"]: value},
)
)
for group in groups.values():
if group["filters"]:
queryset = queryset.query(
"nested",
path=group["path"],
query=Q("bool", must=group["filters"]),
)
return queryset