rows
rows copied to clipboard
import parquet file result in ValueError: Can't decode base64
Hello
I am getting the ValueError: Can't decode base64
when I trying to load a parquet file with
table = rows.import_from_parquet('test.parquet')
for row in table:
print(row)
The parquet file is an export from a csv file via pandas.to_parquet('test.parquet')
.
Thanks for the help.
Here is the full error log:
---------------------------------------------------------------------------
UnicodeEncodeError Traceback (most recent call last)
/anaconda3/envs/learnco_projects/lib/python3.6/base64.py in _bytes_from_decode_data(s)
36 try:
---> 37 return s.encode('ascii')
38 except UnicodeEncodeError:
UnicodeEncodeError: 'ascii' codec can't encode character '\u2019' in position 165: ordinal not in range(128)
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
/anaconda3/envs/learnco_projects/lib/python3.6/site-packages/rows/fields.py in deserialize(cls, value, *args, **kwargs)
131 try:
--> 132 return b64decode(value)
133 except (TypeError, ValueError, binascii.Error):
/anaconda3/envs/learnco_projects/lib/python3.6/base64.py in b64decode(s, altchars, validate)
79 """
---> 80 s = _bytes_from_decode_data(s)
81 if altchars is not None:
/anaconda3/envs/learnco_projects/lib/python3.6/base64.py in _bytes_from_decode_data(s)
38 except UnicodeEncodeError:
---> 39 raise ValueError('string argument should contain only ASCII characters')
40 if isinstance(s, bytes_types):
ValueError: string argument should contain only ASCII characters
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-2-2e711326eb6a> in <module>
----> 1 table = rows.import_from_parquet('test.parquet')
2 for row in table:
3 print(row) # access fields values with `rows.field_name`
/anaconda3/envs/learnco_projects/lib/python3.6/site-packages/rows/plugins/plugin_parquet.py in import_from_parquet(filename_or_fobj, *args, **kwargs)
61 meta = {"imported_from": "parquet", "filename": filename}
62 return create_table(
---> 63 [header] + table_rows, meta=meta, force_types=types, *args, **kwargs
64 )
/anaconda3/envs/learnco_projects/lib/python3.6/site-packages/rows/plugins/utils.py in create_table(data, meta, fields, skip_header, import_fields, samples, force_types, *args, **kwargs)
168 get_row = get_items(*map(header.index, import_fields))
169 table = Table(fields=fields, meta=meta)
--> 170 table.extend(dict(zip(import_fields, get_row(row))) for row in table_rows)
171
172 return table
/anaconda3/envs/learnco_projects/lib/python3.6/_collections_abc.py in extend(self, values)
988 'S.extend(iterable) -- extend sequence by appending elements from the iterable'
989 for v in values:
--> 990 self.append(v)
991
992 def pop(self, index=-1):
/anaconda3/envs/learnco_projects/lib/python3.6/site-packages/rows/table.py in append(self, row)
92 """Add a row to the table. Should be a dict"""
93
---> 94 self._rows.append(self._make_row(row))
95
96 def __len__(self):
/anaconda3/envs/learnco_projects/lib/python3.6/site-packages/rows/table.py in _make_row(self, row)
86 return [
87 field_type.deserialize(row.get(field_name, None))
---> 88 for field_name, field_type in self.fields.items()
89 ]
90
/anaconda3/envs/learnco_projects/lib/python3.6/site-packages/rows/table.py in <listcomp>(.0)
86 return [
87 field_type.deserialize(row.get(field_name, None))
---> 88 for field_name, field_type in self.fields.items()
89 ]
90
/anaconda3/envs/learnco_projects/lib/python3.6/site-packages/rows/fields.py in deserialize(cls, value, *args, **kwargs)
132 return b64decode(value)
133 except (TypeError, ValueError, binascii.Error):
--> 134 raise ValueError("Can't decode base64")
135 else:
136 value_error(value, cls)
ValueError: Can't decode base64
What's the rows version you're using? Could you please provide the file, so I can reproduce here?
Hello I am using version '0.4.1'. test.parquet.zip
Here is the code snippet I am testing rows
with:
import pandas as pd
in_file = '../news_category/data/news_cleaned_2018_02_13.csv.zip'
df = pd.read_csv(in_file, nrows=100, compression='zip')
df.to_parquet('test.parquet')
The zipped csv file comes from here.
Thanks for the help.
Just in case, here is also the csv file. test.csv.zip