hello-ltr
hello-ltr copied to clipboard
TermStatQuery in OpenSearch notebook not working
This might rather be a bug in the OpenSearch plugin than a bug in this repository, so I'm mainly posting this here for visibility.
In the notebook notebooks/opensearch/tmdb/term-stat-query.ipynb in step two, the feature tsq_expr_title_tfidf cannot be logged and an exception is thrown:
---------------------------------------------------------------------------
RequestError Traceback (most recent call last)
Input In [3], in <cell line: 6>()
6 with judgments_open('data/title_judgments.txt') as judgment_list:
7 for qid, query_judgments in groupby(judgment_list, key=lambda j: j.qid):
----> 8 ftr_logger.log_for_qid(judgments=query_judgments,
9 qid=qid,
10 keywords=judgment_list.keywords(qid))
12 df = judgments_to_dataframe(ftr_logger.logged)
13 df
File ~/IdeaProjects/OpenSearchWork/hello-ltr/ltr/log.py:56, in FeatureLogger.log_for_qid(self, qid, judgments, keywords)
48 keywords = re.sub('([^\s\w]|_)+', '', keywords)
50 params = {
51 "keywords": keywords,
52 "fuzzy_keywords": ' '.join([x + '~' for x in keywords.split(' ')]),
53 "keywordsList": [keywords] # Needed by TSQ for the time being
54 }
---> 56 res = self.client.log_query(self.index, self.feature_set, ids, params)
58 # Add feature back to each judgment
59 for doc in res:
File ~/IdeaProjects/OpenSearchWork/hello-ltr/ltr/client/opensearch_client.py:145, in OpenSearchClient.log_query(self, index, featureset, ids, params)
142 if ids is not None:
143 params["query"]["bool"]["must"] = terms_query
--> 145 resp = self.es.search(index=index, body=params)
146 # resp_msg(msg="Searching {} - {}".format(index, str(terms_query)[:20]), resp=SearchResp(resp))
148 matches = []
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/opensearchpy/client/utils.py:177, in query_params.<locals>._wrapper.<locals>._wrapped(*args, **kwargs)
175 if p in kwargs:
176 params[p] = kwargs.pop(p)
--> 177 return func(*args, params=params, headers=headers, **kwargs)
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/opensearchpy/client/__init__.py:1544, in OpenSearch.search(self, body, index, params, headers)
1541 if "from_" in params:
1542 params["from"] = params.pop("from_")
-> 1544 return self.transport.perform_request(
1545 "POST",
1546 _make_path(index, "_search"),
1547 params=params,
1548 headers=headers,
1549 body=body,
1550 )
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/opensearchpy/transport.py:407, in Transport.perform_request(self, method, url, headers, params, body)
405 raise e
406 else:
--> 407 raise e
409 else:
410 # connection didn't fail, confirm it's live status
411 self.connection_pool.mark_live(connection)
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/opensearchpy/transport.py:368, in Transport.perform_request(self, method, url, headers, params, body)
365 connection = self.get_connection()
367 try:
--> 368 status, headers_response, data = connection.perform_request(
369 method,
370 url,
371 params,
372 body,
373 headers=headers,
374 ignore=ignore,
375 timeout=timeout,
376 )
378 # Lowercase all the header names for consistency in accessing them.
379 headers_response = {
380 header.lower(): value for header, value in headers_response.items()
381 }
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/opensearchpy/connection/http_urllib3.py:275, in Urllib3HttpConnection.perform_request(self, method, url, params, body, timeout, ignore, headers)
271 if not (200 <= response.status < 300) and response.status not in ignore:
272 self.log_request_fail(
273 method, full_url, url, orig_body, duration, response.status, raw_data
274 )
--> 275 self._raise_error(
276 response.status,
277 raw_data,
278 self.get_response_headers(response).get("content-type"),
279 )
281 self.log_request_success(
282 method, full_url, url, orig_body, response.status, raw_data, duration
283 )
285 return response.status, response.getheaders(), raw_data
File /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/opensearchpy/connection/base.py:300, in Connection._raise_error(self, status_code, raw_data, content_type)
297 except (ValueError, TypeError) as err:
298 logger.warning("Undecodable raw error response from server: %s", err)
--> 300 raise HTTP_EXCEPTIONS.get(status_code, TransportError)(
301 status_code, error_message, additional_info
302 )
RequestError: RequestError(400, 'search_phase_execution_exception', 'Cannot create query while parsing feature [tsq_expr_title_tfidf]')
Removing the feature and running the notebook prevents this error from happening. The feature works in the ES version of the notebook, so this likely is a bug in the OS plugin.