UnicodeEncodeError in the third get_started tutorial
[Y] I have checked the documentation and related resources and couldn't resolve my bug.
Describe the bug A clear and concise description of what the bug is.
Ragas version: 0.2.14 Python version: 3.12
Code to Reproduce Share code to reproduce the issue
dataset.upload()
Error trace
UnicodeEncodeError Traceback (most recent call last) Cell In[5], line 1 ----> 1 dataset.upload()
File ~/miniforge3/envs/lc/lib/python3.12/site-packages/ragas/testset/synthesizers/testset_schema.py:141, in Testset.upload(self, verbose) 139 def upload(self, verbose: bool = True) -> str: 140 packet = TestsetPacket(samples_original=self.samples, run_id=self.run_id) --> 141 response = upload_packet( 142 path="/alignment/testset", 143 data_json_string=packet.model_dump_json(), 144 ) 145 app_url = get_app_url() 147 testset_endpoint = f"{app_url}/dashboard/alignment/testset/{self.run_id}"
File ~/miniforge3/envs/lc/lib/python3.12/site-packages/ragas/sdk.py:102, in upload_packet(path, data_json_string) 99 print(f" {data_json_string}") 100 print(section_delimiter) --> 102 response = requests.post( 103 f"{base_url}/api/v1{path}", 104 data=data_json_string, 105 headers=headers, 106 timeout=(connection_timeout, read_timeout), 107 ) 109 if enable_http_log: 110 try:
File ~/miniforge3/envs/lc/lib/python3.12/site-packages/requests/api.py:115, in post(url, data, json, **kwargs)
103 def post(url, data=None, json=None, **kwargs):
104 r"""Sends a POST request.
105
106 :param url: URL for the new :class:Request object.
(...) 112 :rtype: requests.Response
113 """
--> 115 return request("post", url, data=data, json=json, **kwargs)
File ~/miniforge3/envs/lc/lib/python3.12/site-packages/requests/api.py:59, in request(method, url, **kwargs) 55 # By using the 'with' statement we are sure the session is closed, thus we 56 # avoid leaving sockets open which can trigger a ResourceWarning in some 57 # cases, and look like a memory leak in others. 58 with sessions.Session() as session: ---> 59 return session.request(method=method, url=url, **kwargs)
File ~/miniforge3/envs/lc/lib/python3.12/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json) 584 send_kwargs = { 585 "timeout": timeout, 586 "allow_redirects": allow_redirects, 587 } 588 send_kwargs.update(settings) --> 589 resp = self.send(prep, **send_kwargs) 591 return resp
File ~/miniforge3/envs/lc/lib/python3.12/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs) 700 start = preferred_clock() 702 # Send the request --> 703 r = adapter.send(request, **kwargs) 705 # Total elapsed time of the request (approximately) 706 elapsed = preferred_clock() - start
File ~/miniforge3/envs/lc/lib/python3.12/site-packages/requests/adapters.py:667, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies) 664 timeout = TimeoutSauce(connect=timeout, read=timeout) 666 try: --> 667 resp = conn.urlopen( 668 method=request.method, 669 url=url, 670 body=request.body, 671 headers=request.headers, 672 redirect=False, 673 assert_same_host=False, 674 preload_content=False, 675 decode_content=False, 676 retries=self.max_retries, 677 timeout=timeout, 678 chunked=chunked, 679 ) 681 except (ProtocolError, OSError) as err: 682 raise ConnectionError(err, request=request)
File ~/miniforge3/envs/lc/lib/python3.12/site-packages/urllib3/connectionpool.py:715, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
712 self._prepare_proxy(conn)
714 # Make the request on the httplib connection object.
--> 715 httplib_response = self._make_request(
716 conn,
717 method,
718 url,
719 timeout=timeout_obj,
720 body=body,
721 headers=headers,
722 chunked=chunked,
723 )
725 # If we're going to release the connection in finally:, then
726 # the response doesn't need to know about the connection. Otherwise
727 # it will also try to release it and we'll have a double-release
728 # mess.
729 response_conn = conn if not release_conn else None
File ~/miniforge3/envs/lc/lib/python3.12/site-packages/urllib3/connectionpool.py:416, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw) 414 conn.request_chunked(method, url, **httplib_request_kw) 415 else: --> 416 conn.request(method, url, **httplib_request_kw) 418 # We are swallowing BrokenPipeError (errno.EPIPE) since the server is 419 # legitimately able to close the connection after sending a valid response. 420 # With this behaviour, the received response is still readable. 421 except BrokenPipeError: 422 # Python 3
File ~/miniforge3/envs/lc/lib/python3.12/site-packages/urllib3/connection.py:244, in HTTPConnection.request(self, method, url, body, headers) 242 if "user-agent" not in (six.ensure_str(k.lower()) for k in headers): 243 headers["User-Agent"] = _get_default_user_agent() --> 244 super(HTTPConnection, self).request(method, url, body=body, headers=headers)
File ~/miniforge3/envs/lc/lib/python3.12/http/client.py:1338, in HTTPConnection.request(self, method, url, body, headers, encode_chunked) 1335 def request(self, method, url, body=None, headers={}, *, 1336 encode_chunked=False): 1337 """Send a complete request to the server.""" -> 1338 self._send_request(method, url, body, headers, encode_chunked)
File ~/miniforge3/envs/lc/lib/python3.12/http/client.py:1383, in HTTPConnection._send_request(self, method, url, body, headers, encode_chunked) 1379 self.putheader(hdr, value) 1380 if isinstance(body, str): 1381 # RFC 2616 Section 3.7.1 says that text default has a 1382 # default charset of iso-8859-1. -> 1383 body = _encode(body, 'body') 1384 self.endheaders(body, encode_chunked=encode_chunked)
File ~/miniforge3/envs/lc/lib/python3.12/http/client.py:166, in _encode(data, name) 164 return data.encode("latin-1") 165 except UnicodeEncodeError as err: --> 166 raise UnicodeEncodeError( 167 err.encoding, 168 err.object, 169 err.start, 170 err.end, 171 "%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') " 172 "if you want to send it encoded in UTF-8." % 173 (name.title(), data[err.start:err.end], name)) from None
UnicodeEncodeError: 'latin-1' codec can't encode character '\u2019' in position 2654: Body ('’') is not valid Latin-1. Use body.encode('utf-8') if you want to send it encoded in UTF-8.
Expected behavior A clear and concise description of what you expected to happen.
The example dataset should be uploaded smoothly. Everything should be robust at least in the get_started docs.
Additional context Add any other context about the problem here.