vcrpy
vcrpy copied to clipboard
HTTPX Responses are not supported for JSON serializer
Hi! First of all, I wanted to thank you for this package and also for the new support to httpx!
I think that we should support a JSON serializer for recorded responses. Here is some code with YAML first:
import unittest
import httpx
from vcr import VCR
from vcr.record_mode import RecordMode
def scrub_malware_bazaar_sample_response(response: dict):
response.update({"content": httpx.Response(status_code=200)})
return response
vcr = VCR(
serializer="yaml",
record_mode=RecordMode.ONCE,
path_transformer=VCR.ensure_suffix(".yaml"),
before_record_response=scrub_malware_bazaar_sample_response,
cassette_library_dir="fixtures/cassettes",
)
class TestPuller(unittest.IsolatedAsyncioTestCase):
@vcr.use_cassette(
cassette_library_dir=vcr.cassette_library_dir + "/malware_bazaar_samples",
filter_headers=["API-KEY"],
)
def test_puller_malware_bazaar_sample_when_request_succeed(self):
response = httpx.post(
url="https://mb-api.abuse.ch/api/v1/",
headers={"API-KEY": "Some Key"},
data={"query": "get_recent", "selector": "time"},
)
print(response)
When running the test below, everything works fine and we indeed succeed to update the key content
with an httpx object here is how it looks:
However, when changing the serializer to JSON, we get an exception with the following traceback:
cassette_dict = {'interactions': [{'request': {'body': 'query=get_recent&selector=time', 'headers': {'accept': ['*/*'], 'accept-encodi... 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}}], 'version': 1}
def serialize(cassette_dict):
error_message = (
"Does this HTTP interaction contain binary data? "
"If so, use a different serializer (like the yaml serializer) "
"for this request?"
)
try:
> return json.dumps(cassette_dict, indent=4) + "\n"
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/serializers/jsonserializer.py:19:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
obj = {'interactions': [{'request': {'body': 'query=get_recent&selector=time', 'headers': {'accept': ['*/*'], 'accept-encodi... 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}}], 'version': 1}
skipkeys = False, ensure_ascii = True, check_circular = True, allow_nan = True
cls = <class 'json.encoder.JSONEncoder'>, indent = 4, separators = None
default = None
def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None,
default=None, sort_keys=False, **kw):
"""Serialize ``obj`` to a JSON formatted ``str``.
If ``skipkeys`` is true then ``dict`` keys that are not basic types
(``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped
instead of raising a ``TypeError``.
If ``ensure_ascii`` is false, then the return value can contain non-ASCII
characters if they appear in strings contained in ``obj``. Otherwise, all
such characters are escaped in JSON strings.
If ``check_circular`` is false, then the circular reference check
for container types will be skipped and a circular reference will
result in an ``RecursionError`` (or worse).
If ``allow_nan`` is false, then it will be a ``ValueError`` to
serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
strict compliance of the JSON specification, instead of using the
JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
If ``indent`` is a non-negative integer, then JSON array elements and
object members will be pretty-printed with that indent level. An indent
level of 0 will only insert newlines. ``None`` is the most compact
representation.
If specified, ``separators`` should be an ``(item_separator, key_separator)``
tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and
``(',', ': ')`` otherwise. To get the most compact JSON representation,
you should specify ``(',', ':')`` to eliminate whitespace.
``default(obj)`` is a function that should return a serializable version
of obj or raise TypeError. The default simply raises TypeError.
If *sort_keys* is true (default: ``False``), then the output of
dictionaries will be sorted by key.
To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
``.default()`` method to serialize additional types), specify it with
the ``cls`` kwarg; otherwise ``JSONEncoder`` is used.
"""
# cached encoder
if (not skipkeys and ensure_ascii and
check_circular and allow_nan and
cls is None and indent is None and separators is None and
default is None and not sort_keys and not kw):
return _default_encoder.encode(obj)
if cls is None:
cls = JSONEncoder
> return cls(
skipkeys=skipkeys, ensure_ascii=ensure_ascii,
check_circular=check_circular, allow_nan=allow_nan, indent=indent,
separators=separators, default=default, sort_keys=sort_keys,
**kw).encode(obj)
/opt/homebrew/Cellar/[email protected]/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/__init__.py:234:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <json.encoder.JSONEncoder object at 0x11034b940>
o = {'interactions': [{'request': {'body': 'query=get_recent&selector=time', 'headers': {'accept': ['*/*'], 'accept-encodi... 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}}], 'version': 1}
def encode(self, o):
"""Return a JSON string representation of a Python data structure.
>>> from json.encoder import JSONEncoder
>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
'{"foo": ["bar", "baz"]}'
"""
# This is for extremely simple cases and benchmarks.
if isinstance(o, str):
if self.ensure_ascii:
return encode_basestring_ascii(o)
else:
return encode_basestring(o)
# This doesn't pass the iterator directly to ''.join() because the
# exceptions aren't as detailed. The list call should be roughly
# equivalent to the PySequence_Fast that ''.join() would do.
chunks = self.iterencode(o, _one_shot=True)
if not isinstance(chunks, (list, tuple)):
> chunks = list(chunks)
/opt/homebrew/Cellar/[email protected]/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py:201:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
o = {'interactions': [{'request': {'body': 'query=get_recent&selector=time', 'headers': {'accept': ['*/*'], 'accept-encodi... 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}}], 'version': 1}
_current_indent_level = 0
def _iterencode(o, _current_indent_level):
if isinstance(o, str):
yield _encoder(o)
elif o is None:
yield 'null'
elif o is True:
yield 'true'
elif o is False:
yield 'false'
elif isinstance(o, int):
# see comment for int/float in _make_iterencode
yield _intstr(o)
elif isinstance(o, float):
# see comment for int/float in _make_iterencode
yield _floatstr(o)
elif isinstance(o, (list, tuple)):
yield from _iterencode_list(o, _current_indent_level)
elif isinstance(o, dict):
> yield from _iterencode_dict(o, _current_indent_level)
/opt/homebrew/Cellar/[email protected]/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py:431:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dct = {'interactions': [{'request': {'body': 'query=get_recent&selector=time', 'headers': {'accept': ['*/*'], 'accept-encodi... 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}}], 'version': 1}
_current_indent_level = 1
def _iterencode_dict(dct, _current_indent_level):
if not dct:
yield '{}'
return
if markers is not None:
markerid = id(dct)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = dct
yield '{'
if _indent is not None:
_current_indent_level += 1
newline_indent = '\n' + _indent * _current_indent_level
item_separator = _item_separator + newline_indent
yield newline_indent
else:
newline_indent = None
item_separator = _item_separator
first = True
if _sort_keys:
items = sorted(dct.items())
else:
items = dct.items()
for key, value in items:
if isinstance(key, str):
pass
# JavaScript is weakly typed for these, so it makes sense to
# also allow them. Many encoders seem to do something like this.
elif isinstance(key, float):
# see comment for int/float in _make_iterencode
key = _floatstr(key)
elif key is True:
key = 'true'
elif key is False:
key = 'false'
elif key is None:
key = 'null'
elif isinstance(key, int):
# see comment for int/float in _make_iterencode
key = _intstr(key)
elif _skipkeys:
continue
else:
raise TypeError(f'keys must be str, int, float, bool or None, '
f'not {key.__class__.__name__}')
if first:
first = False
else:
yield item_separator
yield _encoder(key)
yield _key_separator
if isinstance(value, str):
yield _encoder(value)
elif value is None:
yield 'null'
elif value is True:
yield 'true'
elif value is False:
yield 'false'
elif isinstance(value, int):
# see comment for int/float in _make_iterencode
yield _intstr(value)
elif isinstance(value, float):
# see comment for int/float in _make_iterencode
yield _floatstr(value)
else:
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
> yield from chunks
/opt/homebrew/Cellar/[email protected]/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py:405:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
lst = [{'request': {'body': 'query=get_recent&selector=time', 'headers': {'accept': ['*/*'], 'accept-encoding': ['gzip, defl...lf'; object-src 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}}]
_current_indent_level = 2
def _iterencode_list(lst, _current_indent_level):
if not lst:
yield '[]'
return
if markers is not None:
markerid = id(lst)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = lst
buf = '['
if _indent is not None:
_current_indent_level += 1
newline_indent = '\n' + _indent * _current_indent_level
separator = _item_separator + newline_indent
buf += newline_indent
else:
newline_indent = None
separator = _item_separator
first = True
for value in lst:
if first:
first = False
else:
buf = separator
if isinstance(value, str):
yield buf + _encoder(value)
elif value is None:
yield buf + 'null'
elif value is True:
yield buf + 'true'
elif value is False:
yield buf + 'false'
elif isinstance(value, int):
# Subclasses of int/float may override __repr__, but we still
# want to encode them as integers/floats in JSON. One example
# within the standard library is IntEnum.
yield buf + _intstr(value)
elif isinstance(value, float):
# see comment above for int
yield buf + _floatstr(value)
else:
yield buf
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
> yield from chunks
/opt/homebrew/Cellar/[email protected]/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py:325:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dct = {'request': {'body': 'query=get_recent&selector=time', 'headers': {'accept': ['*/*'], 'accept-encoding': ['gzip, defla...elf'; object-src 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}}
_current_indent_level = 3
def _iterencode_dict(dct, _current_indent_level):
if not dct:
yield '{}'
return
if markers is not None:
markerid = id(dct)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = dct
yield '{'
if _indent is not None:
_current_indent_level += 1
newline_indent = '\n' + _indent * _current_indent_level
item_separator = _item_separator + newline_indent
yield newline_indent
else:
newline_indent = None
item_separator = _item_separator
first = True
if _sort_keys:
items = sorted(dct.items())
else:
items = dct.items()
for key, value in items:
if isinstance(key, str):
pass
# JavaScript is weakly typed for these, so it makes sense to
# also allow them. Many encoders seem to do something like this.
elif isinstance(key, float):
# see comment for int/float in _make_iterencode
key = _floatstr(key)
elif key is True:
key = 'true'
elif key is False:
key = 'false'
elif key is None:
key = 'null'
elif isinstance(key, int):
# see comment for int/float in _make_iterencode
key = _intstr(key)
elif _skipkeys:
continue
else:
raise TypeError(f'keys must be str, int, float, bool or None, '
f'not {key.__class__.__name__}')
if first:
first = False
else:
yield item_separator
yield _encoder(key)
yield _key_separator
if isinstance(value, str):
yield _encoder(value)
elif value is None:
yield 'null'
elif value is True:
yield 'true'
elif value is False:
yield 'false'
elif isinstance(value, int):
# see comment for int/float in _make_iterencode
yield _intstr(value)
elif isinstance(value, float):
# see comment for int/float in _make_iterencode
yield _floatstr(value)
else:
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
> yield from chunks
/opt/homebrew/Cellar/[email protected]/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py:405:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
dct = {'content': <Response [200 OK]>, 'headers': {'Cache-Control': ['max-age=2592000'], 'Connection': ['Keep-Alive'], 'Cont...self'; object-src 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}
_current_indent_level = 4
def _iterencode_dict(dct, _current_indent_level):
if not dct:
yield '{}'
return
if markers is not None:
markerid = id(dct)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = dct
yield '{'
if _indent is not None:
_current_indent_level += 1
newline_indent = '\n' + _indent * _current_indent_level
item_separator = _item_separator + newline_indent
yield newline_indent
else:
newline_indent = None
item_separator = _item_separator
first = True
if _sort_keys:
items = sorted(dct.items())
else:
items = dct.items()
for key, value in items:
if isinstance(key, str):
pass
# JavaScript is weakly typed for these, so it makes sense to
# also allow them. Many encoders seem to do something like this.
elif isinstance(key, float):
# see comment for int/float in _make_iterencode
key = _floatstr(key)
elif key is True:
key = 'true'
elif key is False:
key = 'false'
elif key is None:
key = 'null'
elif isinstance(key, int):
# see comment for int/float in _make_iterencode
key = _intstr(key)
elif _skipkeys:
continue
else:
raise TypeError(f'keys must be str, int, float, bool or None, '
f'not {key.__class__.__name__}')
if first:
first = False
else:
yield item_separator
yield _encoder(key)
yield _key_separator
if isinstance(value, str):
yield _encoder(value)
elif value is None:
yield 'null'
elif value is True:
yield 'true'
elif value is False:
yield 'false'
elif isinstance(value, int):
# see comment for int/float in _make_iterencode
yield _intstr(value)
elif isinstance(value, float):
# see comment for int/float in _make_iterencode
yield _floatstr(value)
else:
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, _current_indent_level)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
> yield from chunks
/opt/homebrew/Cellar/[email protected]/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py:405:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
o = <Response [200 OK]>, _current_indent_level = 4
def _iterencode(o, _current_indent_level):
if isinstance(o, str):
yield _encoder(o)
elif o is None:
yield 'null'
elif o is True:
yield 'true'
elif o is False:
yield 'false'
elif isinstance(o, int):
# see comment for int/float in _make_iterencode
yield _intstr(o)
elif isinstance(o, float):
# see comment for int/float in _make_iterencode
yield _floatstr(o)
elif isinstance(o, (list, tuple)):
yield from _iterencode_list(o, _current_indent_level)
elif isinstance(o, dict):
yield from _iterencode_dict(o, _current_indent_level)
else:
if markers is not None:
markerid = id(o)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = o
> o = _default(o)
/opt/homebrew/Cellar/[email protected]/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py:438:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <json.encoder.JSONEncoder object at 0x11034b940>, o = <Response [200 OK]>
def default(self, o):
"""Implement this method in a subclass such that it returns
a serializable object for ``o``, or calls the base implementation
(to raise a ``TypeError``).
For example, to support arbitrary iterators, you could
implement default like this::
def default(self, o):
try:
iterable = iter(o)
except TypeError:
pass
else:
return list(iterable)
# Let the base class default method raise the TypeError
return JSONEncoder.default(self, o)
"""
> raise TypeError(f'Object of type {o.__class__.__name__} '
f'is not JSON serializable')
E TypeError: Object of type Response is not JSON serializable
/opt/homebrew/Cellar/[email protected]/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py:179: TypeError
During handling of the above exception, another exception occurred:
/opt/homebrew/Cellar/[email protected]/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/unittest/async_case.py:64: in _callTestMethod
self._callMaybeAsync(method)
/opt/homebrew/Cellar/[email protected]/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/unittest/async_case.py:83: in _callMaybeAsync
ret = func(*args, **kwargs)
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/cassette.py:100: in __call__
return type(self)(self.cls, args_getter)._execute_function(function, args, kwargs)
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/cassette.py:114: in _execute_function
return self._handle_function(fn=handle_function)
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/cassette.py:138: in _handle_function
return fn(cassette)
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/cassette.py:91: in __exit__
next(self.__finish, None)
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/cassette.py:69: in _patch_generator
cassette._save()
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/cassette.py:331: in _save
self._persister.save_cassette(self._path, self._as_dict(), serializer=self._serializer)
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/persisters/filesystem.py:20: in save_cassette
data = serialize(cassette_dict, serializer)
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/serialize.py:58: in serialize
return serializer.serialize(data)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
cassette_dict = {'interactions': [{'request': {'body': 'query=get_recent&selector=time', 'headers': {'accept': ['*/*'], 'accept-encodi... 'none'"], 'Content-Type': ['application/json'], ...}, 'http_version': 'HTTP/1.1', 'status_code': 200}}], 'version': 1}
def serialize(cassette_dict):
error_message = (
"Does this HTTP interaction contain binary data? "
"If so, use a different serializer (like the yaml serializer) "
"for this request?"
)
try:
return json.dumps(cassette_dict, indent=4) + "\n"
except UnicodeDecodeError as original: # py2
raise UnicodeDecodeError(
original.encoding,
b"Error serializing cassette to JSON",
original.start,
original.end,
original.args[-1] + error_message,
)
except TypeError: # py3
> raise TypeError(error_message)
E TypeError: Does this HTTP interaction contain binary data? If so, use a different serializer (like the yaml serializer) for this request?
../../../.local/share/virtualenvs/puller-z7wjjB6x/lib/python3.9/site-packages/vcr/serializers/jsonserializer.py:29: TypeError
I think that we should support binary data for JSON responses as well.
Thank you!
@YoniMelki, why are you replacing the response content with a httpx.Response
object? Should this function just erase the content of the original response?
def scrub_malware_bazaar_sample_response(response: dict):
response.update({"content": httpx.Response(status_code=200)})
return response
At the point of this update, the original httpx.Response
was already serialized and the "content"
was supposed to contain a UTF-8 string. Your code says you have a response whose content contains a nested response object. We would never get a real HTTP response like that.
@boechat107 So how would you suggest to do? I still wants all the fields of the response