ParamTools
ParamTools copied to clipboard
`fsspec` 0.9.0 doesn't read entire JSON file when using http protocol
Note that using the github protocol (i.e. reading the file from the github api) works just fine:
>>> tc.Calculator.read_json_param_objects(None, 'github://PSLmodels:Tax-Calculator@master/taxcalc/assumptions/economic_assumptions_template.json')
{'policy': {}, 'consumption': {'MPC_e17500': {2017: 0.0}, 'MPC_e18400': {2017: 0.0}, 'MPC_e19800': {2017: 0.0}, 'MPC_e20400': {2017: 0.0}, 'BEN_housing_value': {2017: 1.0}, 'BEN_snap_value': {2017: 1.0}, 'BEN_tanf_value': {2017: 1.0}, 'BEN_vet_value': {2017: 1.0}, 'BEN_wic_value': {2017: 1.0}, 'BEN_mcare_value': {2017: 1.0}, 'BEN_mcaid_value': {2017: 1.0}, 'BEN_other_value': {2017: 1.0}}, 'growdiff_baseline': {'ABOOK': {2017: 0.0}, 'ACGNS': {2017: 0.0}, 'ACPIM': {2017: 0.0}, 'ACPIU': {2017: 0.0}, 'ADIVS': {2017: 0.0}, 'AINTS': {2017: 0.0}, 'AIPD': {2017: 0.0}, 'ASCHCI': {2017: 0.0}, 'ASCHCL': {2017: 0.0}, 'ASCHEI': {2017: 0.0}, 'ASCHEL': {2017: 0.0}, 'ASCHF': {2017: 0.0}, 'ASOCSEC': {2017: 0.0}, 'ATXPY': {2017: 0.0}, 'AUCOMP': {2017: 0.0}, 'AWAGE': {2017: 0.0}, 'ABENOTHER': {2017: 0.0}, 'ABENMCARE': {2017: 0.0}, 'ABENMCAID': {2017: 0.0}, 'ABENSSI': {2017: 0.0}, 'ABENSNAP': {2017: 0.0}, 'ABENWIC': {2017: 0.0}, 'ABENHOUSING': {2017: 0.0}, 'ABENTANF': {2017: 0.0}, 'ABENVET': {2017: 0.0}}, 'growdiff_response': {'ABOOK': {2017: 0.0}, 'ACGNS': {2017: 0.0}, 'ACPIM': {2017: 0.0}, 'ACPIU': {2017: 0.0}, 'ADIVS': {2017: 0.0}, 'AINTS': {2017: 0.0}, 'AIPD': {2017: 0.0}, 'ASCHCI': {2017: 0.0}, 'ASCHCL': {2017: 0.0}, 'ASCHEI': {2017: 0.0}, 'ASCHEL': {2017: 0.0}, 'ASCHF': {2017: 0.0}, 'ASOCSEC': {2017: 0.0}, 'ATXPY': {2017: 0.0}, 'AUCOMP': {2017: 0.0}, 'AWAGE': {2017: 0.0}, 'ABENOTHER': {2017: 0.0}, 'ABENMCARE': {2017: 0.0}, 'ABENMCAID': {2017: 0.0}, 'ABENSSI': {2017: 0.0}, 'ABENSNAP': {2017: 0.0}, 'ABENWIC': {2017: 0.0}, 'ABENHOUSING': {2017: 0.0}, 'ABENTANF': {2017: 0.0}, 'ABENVET': {2017: 0.0}}}
Thanks for the report @chusloj.
Related:
- https://github.com/pangeo-data/pangeo-datastore/issues/124#issuecomment-814122118
- https://github.com/PSLmodels/Tax-Calculator/pull/2584#issuecomment-814365830
Not sure if this is related, but I'm having trouble reading JSON files from GitHub URLs.
When using ParamTools on my local machine, and trying read a raw json file from a github URL, I get an error:
reform_filename = 'https://raw.githubusercontent.com/PSLmodels/Tax-Calculator/master/docs/recipes/_static/reformA.json'
params = tc.Calculator.read_json_param_objects(reform_filename, None)
---------------------------------------------------------------------------
JSONDecodeError Traceback (most recent call last)
~/anaconda3/envs/taxcalc-dev/lib/python3.8/site-packages/paramtools/utils.py in read_json(params_or_path, storage_options)
108 res = remove_comments(res)
--> 109 return json.loads(res, object_pairs_hook=OrderedDict)
110 except json.JSONDecodeError as je:
~/anaconda3/envs/taxcalc-dev/lib/python3.8/json/__init__.py in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
369 kw['parse_constant'] = parse_constant
--> 370 return cls(**kw).decode(s)
~/anaconda3/envs/taxcalc-dev/lib/python3.8/json/decoder.py in decode(self, s, _w)
336 """
--> 337 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
338 end = _w(s, end).end()
~/anaconda3/envs/taxcalc-dev/lib/python3.8/json/decoder.py in raw_decode(self, s, idx)
352 try:
--> 353 obj, end = self.scan_once(s, idx)
354 except StopIteration as err:
JSONDecodeError: Expecting property name enclosed in double quotes: line 9 column 1 (char 80)
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
<ipython-input-8-a783a22ba0d1> in <module>
1 reform_filename = 'https://raw.githubusercontent.com/PSLmodels/Tax-Calculator/master/docs/recipes/_static/reformA.json'
----> 2 params = tc.Calculator.read_json_param_objects(reform_filename, None)
~/repos/tax-calculator/taxcalc/calculator.py in read_json_param_objects(reform, assump)
1100 # construct the composite dictionary
1101 param_dict = dict()
-> 1102 param_dict['policy'] = Policy.read_json_reform(reform)
1103 param_dict['consumption'] = Consumption.read_json_update(assump)
1104 for topkey in ['growdiff_baseline', 'growdiff_response']:
~/repos/tax-calculator/taxcalc/policy.py in read_json_reform(obj)
110 pointing to a valid JSON file hosted online, or a valid JSON text.
111 """
--> 112 return Parameters._read_json_revision(obj, 'policy')
113
114 def implement_reform(self, reform,
~/repos/tax-calculator/taxcalc/parameters.py in _read_json_revision(obj, topkey)
747 return dict()
748
--> 749 full_dict = pt.read_json(obj)
750
751 # check top-level key contents of dictionary
~/anaconda3/envs/taxcalc-dev/lib/python3.8/site-packages/paramtools/utils.py in read_json(params_or_path, storage_options)
111 if len(res) > 100:
112 res = res[:100] + "..." + res[-10:]
--> 113 raise ValueError(f"Unable to decode JSON string: {res}") from je
114
115 if isinstance(res, dict):
ValueError: Unable to decode JSON string: {
"policy": {
"II_em": {"2020": 1000},
And when trying to read from a non-raw file github URL:
reform_filename = 'https://github.com/PSLmodels/Tax-Calculator/blob/master/docs/recipes/_static/reformA.json'
params = tc.Calculator.read_json_param_objects(reform_filename, None)
---------------------------------------------------------------------------
JSONDecodeError Traceback (most recent call last)
~/anaconda3/envs/taxcalc-dev/lib/python3.8/site-packages/paramtools/utils.py in read_json(params_or_path, storage_options)
108 res = remove_comments(res)
--> 109 return json.loads(res, object_pairs_hook=OrderedDict)
110 except json.JSONDecodeError as je:
~/anaconda3/envs/taxcalc-dev/lib/python3.8/json/__init__.py in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
369 kw['parse_constant'] = parse_constant
--> 370 return cls(**kw).decode(s)
~/anaconda3/envs/taxcalc-dev/lib/python3.8/json/decoder.py in decode(self, s, _w)
336 """
--> 337 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
338 end = _w(s, end).end()
~/anaconda3/envs/taxcalc-dev/lib/python3.8/json/decoder.py in raw_decode(self, s, idx)
354 except StopIteration as err:
--> 355 raise JSONDecodeError("Expecting value", s, err.value) from None
356 return obj, end
JSONDecodeError: Expecting value: line 7 column 1 (char 6)
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
<ipython-input-9-17150c37eb39> in <module>
1 reform_filename = 'https://github.com/PSLmodels/Tax-Calculator/blob/master/docs/recipes/_static/reformA.json'
----> 2 params = tc.Calculator.read_json_param_objects(reform_filename, None)
~/repos/tax-calculator/taxcalc/calculator.py in read_json_param_objects(reform, assump)
1100 # construct the composite dictionary
1101 param_dict = dict()
-> 1102 param_dict['policy'] = Policy.read_json_reform(reform)
1103 param_dict['consumption'] = Consumption.read_json_update(assump)
1104 for topkey in ['growdiff_baseline', 'growdiff_response']:
~/repos/tax-calculator/taxcalc/policy.py in read_json_reform(obj)
110 pointing to a valid JSON file hosted online, or a valid JSON text.
111 """
--> 112 return Parameters._read_json_revision(obj, 'policy')
113
114 def implement_reform(self, reform,
~/repos/tax-calculator/taxcalc/parameters.py in _read_json_revision(obj, topkey)
747 return dict()
748
--> 749 full_dict = pt.read_json(obj)
750
751 # check top-level key contents of dictionary
~/anaconda3/envs/taxcalc-dev/lib/python3.8/site-packages/paramtools/utils.py in read_json(params_or_path, storage_options)
111 if len(res) > 100:
112 res = res[:100] + "..." + res[-10:]
--> 113 raise ValueError(f"Unable to decode JSON string: {res}") from je
114
115 if isinstance(res, dict):
ValueError: Unable to decode JSON string:
<!DOCTYPE html>
<html lang="en" data-color-mode="auto" data-light-theme="light" data-dark-them...
</html>
I have ParamTools 0.18.1 installed:
(taxcalc-dev) jason.debacker@JDEBACKER ~ % conda list paramtools
# packages in environment at /Users/jason.debacker/anaconda3/envs/taxcalc-dev:
#
# Name Version Build Channel
paramtools 0.18.1 pyhd8ed1ab_0 conda-forge
(taxcalc-dev) jason.debacker@JDEBACKER ~ %
When running the same notebook on Google Colab, the first approach works, but the second does not. I am not sure what ParamTools version is being installed with taxcalc
there...
@jdebacker this is related. You can use this kind of URL:
In [1]: import paramtools
In [2]: paramtools.read_json("github://PSLmodels:Tax-Calculator@master/docs/recipes/_static/reformA.json")
Out[2]:
OrderedDict([('policy',
OrderedDict([('II_em', OrderedDict([('2020', 1000)])),
('II_rt5', OrderedDict([('2020', 0.36)])),
('II_rt6', OrderedDict([('2020', 0.39)])),
('II_rt7', OrderedDict([('2020', 0.41)])),
('PT_rt5', OrderedDict([('2020', 0.36)])),
('PT_rt6', OrderedDict([('2020', 0.39)])),
('PT_rt7', OrderedDict([('2020', 0.41)]))]))])
In [3]:
Or, you can pin to fsspec<0.9
.
I'll try to figure out how to resolve the bug with reading HTTP urls this week.