django-haystack
django-haystack copied to clipboard
Quoted search strings raise QueryError with whoosh backend
Searching for a quoted string like "foo bar" fails with a QueryError for me. Perhaps needless to say - it only happens when the search string is actually indexed.
I'm using a recent django-haystack development version and whoosh 2.4.1.
There's nothing special about my search index definition, it can be stripped down to:
class ContentIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.EdgeNgramField(document=True)
def get_model(self):
return Content
def index_queryset(self):
return self.get_model().objects.active()
def prepare_text(self, obj):
return '%s %s' % (obj.title, obj.subtitle)
Here's the traceback:
Request Method: GET
Request URL: http://127.0.0.1:8000/search/?q=%22web+3d%22
Django Version: 1.4
Python Version: 2.7.3
[...]
Traceback:
File "/path/to/django/core/handlers/base.py" in get_response
111. response = callback(request, *callback_args, **callback_kwargs)
File "/path/to/haystack/views.py" in __call__
50. return self.create_response()
File "/path/to/haystack/views.py" in create_response
130. (paginator, page) = self.build_page()
File "/path/to/haystack/views.py" in build_page
107. self.results[start_offset:start_offset + self.results_per_page]
File "/path/to/haystack/query.py" in __getitem__
261. self._fill_cache(start, bound)
File "/path/to/haystack/query.py" in _fill_cache
159. results = self.query.get_results(**kwargs)
File "/path/to/haystack/backends/__init__.py" in get_results
484. self.run(**kwargs)
File "/path/to/haystack/backends/__init__.py" in run
402. results = self.backend.search(final_query, **search_kwargs)
File "/path/to/haystack/backends/__init__.py" in wrapper
26. return func(obj, query_string, *args, **kwargs)
File "/path/to/haystack/backends/whoosh_backend.py" in search
388. raw_results = searcher.search(parsed_query, limit=end_offset, sortedby=sort_by, reverse=reverse)
File "/usr/lib/python2.7/dist-packages/whoosh/searching.py" in search
762. self.search_with_collector(q, c)
File "/usr/lib/python2.7/dist-packages/whoosh/searching.py" in search_with_collector
803. collector.set_subsearcher(subsearcher, offset)
File "/usr/lib/python2.7/dist-packages/whoosh/collectors.py" in set_subsearcher
152. self.matcher = self.q.matcher(subsearcher)
File "/usr/lib/python2.7/dist-packages/whoosh/query/positional.py" in matcher
136. % self.fieldname)
Exception Type: QueryError at /search/
Exception Value: Phrase search: 'text' field has no positions
This still happens with Django-haystack 2.2.0 and Whoos 2.6.0. Every time I try to search for a quoted multi-word string, I get an exception:
Django Version: 1.6.5 Python Version: 2.6.9 Installed Applications: ('django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', 'south', 'template', 'suit', 'suit_ckeditor', 'adminsortable', 'personnel', 'haystack', 'django.contrib.admin', 'django.contrib.admindocs', 'menu', 'attachments', 'static_pages') Installed Middleware: ('django.middleware.common.CommonMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.messages.middleware.MessageMiddleware')
Traceback:
File "/path_to_code/python2.6/site-packages/django/core/handlers/base.py" in get_response
112. response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/path_to_app/personnel/views.py" in search
22. return _return_correct_results_template(request, personnel_results, departmental_results, on_campus, searchtext)
File "/path_to_app/personnel/views.py" in _return_correct_results_template
88. if not personnel_results and not departmental_results:
File "/path_to_code/python2.6/site-packages/haystack/query.py" in __len__
91. self._result_count = self.query.get_count()
File "/path_to_code/python2.6/site-packages/haystack/backends/__init__.py" in get_count
625. self.run()
File "/path_to_code/python2.6/site-packages/haystack/backends/__init__.py" in run
562. results = self.backend.search(final_query, **search_kwargs)
File "/path_to_code/python2.6/site-packages/haystack/backends/__init__.py" in wrapper
34. return func(obj, query_string, *args, **kwargs)
File "/path_to_code/python2.6/site-packages/haystack/backends/whoosh_backend.py" in search
424. **search_kwargs
File "/path_to_code/python2.6/site-packages/whoosh/searching.py" in search_page
640. results = self.search(query, limit=pagenum * pagelen, **kwargs)
File "/path_to_code/python2.6/site-packages/whoosh/searching.py" in search
787. self.search_with_collector(q, c)
File "/path_to_code/python2.6/site-packages/whoosh/searching.py" in search_with_collector
820. collector.run()
File "/path_to_code/python2.6/site-packages/whoosh/collectors.py" in run
143. self.set_subsearcher(subsearcher, offset)
File "/path_to_code/python2.6/site-packages/whoosh/collectors.py" in set_subsearcher
618. self.child.set_subsearcher(subsearcher, offset)
File "/path_to_code/python2.6/site-packages/whoosh/collectors.py" in set_subsearcher
171. self.matcher = self.q.matcher(subsearcher, self.context)
File "/path_to_code/python2.6/site-packages/whoosh/query/positional.py" in matcher
222. % self.fieldname)
Exception Type: QueryError at /personnel/search
Exception Value: Phrase search: 'text' field has no positions
Commenting out the code at line 222 makes the error go away, as long as the search returns no results. If the quoted search would return a result, however, this exception is thrown:
Traceback:
File "/path_to_code/python2.6/site-packages/django/core/handlers/base.py" in get_response
112. response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/path_to_app/personnel/views.py" in search
22. return _return_correct_results_template(request, personnel_results, departmental_results, on_campus, searchtext)
File "/path_to_app/personnel/views.py" in _return_correct_results_template
88. if not personnel_results and not departmental_results:
File "/path_to_code/python2.6/site-packages/haystack/query.py" in __len__
91. self._result_count = self.query.get_count()
File "/path_to_code/python2.6/site-packages/haystack/backends/__init__.py" in get_count
625. self.run()
File "/path_to_code/python2.6/site-packages/haystack/backends/__init__.py" in run
562. results = self.backend.search(final_query, **search_kwargs)
File "/path_to_code/python2.6/site-packages/haystack/backends/__init__.py" in wrapper
34. return func(obj, query_string, *args, **kwargs)
File "/path_to_code/python2.6/site-packages/haystack/backends/whoosh_backend.py" in search
424. **search_kwargs
File "/path_to_code/python2.6/site-packages/whoosh/searching.py" in search_page
640. results = self.search(query, limit=pagenum * pagelen, **kwargs)
File "/path_to_code/python2.6/site-packages/whoosh/searching.py" in search
787. self.search_with_collector(q, c)
File "/path_to_code/python2.6/site-packages/whoosh/searching.py" in search_with_collector
820. collector.run()
File "/path_to_code/python2.6/site-packages/whoosh/collectors.py" in run
143. self.set_subsearcher(subsearcher, offset)
File "/path_to_code/python2.6/site-packages/whoosh/collectors.py" in set_subsearcher
618. self.child.set_subsearcher(subsearcher, offset)
File "/path_to_code/python2.6/site-packages/whoosh/collectors.py" in set_subsearcher
171. self.matcher = self.q.matcher(subsearcher, self.context)
File "/path_to_code/python2.6/site-packages/whoosh/query/positional.py" in matcher
241. m = q.matcher(searcher, context)
File "/path_to_code/python2.6/site-packages/whoosh/query/spans.py" in matcher
561. mindist=self.mindist)
File "/path_to_code/python2.6/site-packages/whoosh/query/spans.py" in __init__
570. super(SpanNear2.SpanNear2Matcher, self).__init__(isect)
File "/path_to_code/python2.6/site-packages/whoosh/query/spans.py" in __init__
196. self._find_next()
File "/path_to_code/python2.6/site-packages/whoosh/query/spans.py" in _find_next
213. spans = self._get_spans()
File "/path_to_code/python2.6/site-packages/whoosh/query/spans.py" in _get_spans
588. aspans = ms[0].spans()
File "/path_to_code/python2.6/site-packages/whoosh/matching/mcore.py" in spans
610. % self.term())
Exception Type: TypeError at /personnel/search
Exception Value: not all arguments converted during string formatting
It's hard to tell which codebase is at fault here. It could be Whoosh's fault, or Django-haystack may be using Whoosh improperly. I'm going to post this to both projects' issue queues, to hopefully find a solution quickly.
EdgeNgramField does not support position information.
This fields is mapped to whoosh.fields.NGRAMWORDS, which is initialized with self.format=formats.Frequency(field_boost=field_boost)
Looks like this is indeed issue with haystack, see here
@chhantyal Looks like it's an issue with the Whoosh backend. If you'd like to submit a pull-request which changes the quoting behaviour for EdgeNgramField, I can review it.
I'm still seeing this error with django-haystack 3.1.1 and Whoosh 2.7.4.