cuspatial
cuspatial copied to clipboard
[FEA] cuspatial should natively read geopandas parquet files
Describe the solution you'd like I'd like cuspatial to read a geopandas based parquet file
Describe alternatives you've considered Trying the docs for https://docs.rapids.ai/api/cuspatial/stable/api_docs/geopandas_compatibility.html,
df = gpd.read_parquet(file.parquet)
gdf = cuspatial.from_geopandas(df)
gdf.head()
gives this error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_1315/999814630.py in <module>
----> 1 gdf.head()
/opt/conda/envs/rapids/lib/python3.8/contextlib.py in inner(*args, **kwds)
73 def inner(*args, **kwds):
74 with self._recreate_cm():
---> 75 return func(*args, **kwds)
76 return inner
77
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/frame.py in head(self, n)
4269 1 1 11.0
4270 """
-> 4271 return self.iloc[:n]
4272
4273 @_cudf_nvtx_annotate
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/dataframe.py in __getitem__(self, arg)
125 if not isinstance(arg, tuple):
126 arg = (arg, slice(None))
--> 127 return self._getitem_tuple_arg(arg)
128
129 def __setitem__(self, key, value):
/opt/conda/envs/rapids/lib/python3.8/contextlib.py in inner(*args, **kwds)
73 def inner(*args, **kwds):
74 with self._recreate_cm():
---> 75 return func(*args, **kwds)
76 return inner
77
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/dataframe.py in _getitem_tuple_arg(self, arg)
394 else:
395 if isinstance(arg[0], slice):
--> 396 df = columns_df._slice(arg[0])
397 elif is_scalar(arg[0]):
398 index = arg[0]
/opt/conda/envs/rapids/lib/python3.8/contextlib.py in inner(*args, **kwds)
73 def inner(*args, **kwds):
74 with self._recreate_cm():
---> 75 return func(*args, **kwds)
76 return inner
77
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/dataframe.py in _slice(self, arg)
1323 *self._columns,
1324 ]
-> 1325 result = self._from_columns_like_self(
1326 libcudf.copying.columns_slice(columns_to_slice, [start, stop])[0],
1327 self._column_names,
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/dataframe.py in _from_columns_like_self(self, columns, column_names, index_names)
6098 columns, column_names, index_names
6099 )
-> 6100 result._set_column_names_like(self)
6101 return result
6102
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/dataframe.py in _set_column_names_like(self, other)
2110
2111 def _set_column_names_like(self, other):
-> 2112 self._set_column_names(
2113 other._data.names, other._data.multiindex, other._data.level_names
2114 )
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/dataframe.py in _set_column_names(self, names, multiindex, level_names)
2105 raise ValueError("Duplicate column names are not allowed")
2106
-> 2107 self._data = ColumnAccessor(
2108 data, multiindex=multiindex, level_names=level_names,
2109 )
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/column_accessor.py in __init__(self, data, multiindex, level_names)
128 v = column.as_column(v)
129 if len(v) != column_length:
--> 130 raise ValueError("All columns must be of equal length")
131 self._data[k] = v
132
ValueError: All columns must be of equal length
gdf = cuspatial.GeoDataFrame(file.parquet) gives this error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_1315/2302411113.py in <module>
----> 1 gdf = cuspatial.GeoDataFrame('census-parquet/part.6.parquet')
/opt/conda/envs/rapids/lib/python3.8/site-packages/cuspatial/geometry/geodataframe.py in __init__(self, data)
42 pass
43 else:
---> 44 raise ValueError("Invalid type passed to GeoDataFrame ctor")
45
46 @property
ValueError: Invalid type passed to GeoDataFrame ctor
gdf = cudf.read_parquet(file.parquet)
reads in fine, big gdf.head()
gives an error:
---------------------------------------------------------------------------
ArrowException Traceback (most recent call last)
/opt/conda/envs/rapids/lib/python3.8/site-packages/IPython/core/formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
/opt/conda/envs/rapids/lib/python3.8/site-packages/IPython/lib/pretty.py in pretty(self, obj)
392 if cls is not object \
393 and callable(cls.__dict__.get('__repr__')):
--> 394 return _repr_pprint(obj, self, cycle)
395
396 return _default_pprint(obj, self, cycle)
/opt/conda/envs/rapids/lib/python3.8/site-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
698 """A pprint that just redirects to the normal repr function."""
699 # Find newlines and replace them with p.break_()
--> 700 output = repr(obj)
701 lines = output.splitlines()
702 with p.group():
/opt/conda/envs/rapids/lib/python3.8/contextlib.py in inner(*args, **kwds)
73 def inner(*args, **kwds):
74 with self._recreate_cm():
---> 75 return func(*args, **kwds)
76 return inner
77
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/dataframe.py in __repr__(self)
1782 def __repr__(self):
1783 output = self._get_renderable_dataframe()
-> 1784 return self._clean_renderable_dataframe(output)
1785
1786 @_cudf_nvtx_annotate
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/dataframe.py in _clean_renderable_dataframe(self, output)
1660 width = None
1661
-> 1662 output = output.to_pandas().to_string(
1663 max_rows=max_rows,
1664 min_rows=min_rows,
/opt/conda/envs/rapids/lib/python3.8/contextlib.py in inner(*args, **kwds)
73 def inner(*args, **kwds):
74 with self._recreate_cm():
---> 75 return func(*args, **kwds)
76 return inner
77
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/dataframe.py in to_pandas(self, nullable, **kwargs)
4332
4333 for i, col_key in enumerate(self._data):
-> 4334 out_data[i] = self._data[col_key].to_pandas(
4335 index=out_index, nullable=nullable
4336 )
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/column/string.py in to_pandas(self, index, nullable, **kwargs)
5290 pd_series = pd.Series(pandas_array, copy=False)
5291 else:
-> 5292 pd_series = self.to_arrow().to_pandas(**kwargs)
5293
5294 if index is not None:
/opt/conda/envs/rapids/lib/python3.8/site-packages/pyarrow/array.pxi in pyarrow.lib._PandasConvertible.to_pandas()
/opt/conda/envs/rapids/lib/python3.8/site-packages/pyarrow/array.pxi in pyarrow.lib.Array._to_pandas()
/opt/conda/envs/rapids/lib/python3.8/site-packages/pyarrow/array.pxi in pyarrow.lib._array_like_to_pandas()
/opt/conda/envs/rapids/lib/python3.8/site-packages/pyarrow/error.pxi in pyarrow.lib.check_status()
ArrowException: Unknown error: Wrapping _�HVcc�Υ����3@!��Vcc�b�{���3@��KqUcc��v�1�3@�1ZGUcc��&k�C�3@oc�#Ucc���}8H�3@��hUcc����I�3@F���Tcc�z�W�3@��~�Tcc�M֨�h�3@� 1�Tcc��,D��3@L�uTcc��{��3@�qScc�$�&ݖ�3@`!sePcc�`�|x��3@%#gaOcc���g��3@&W��Mcc���g��3@4w�Icc���g��3@('�UHcc���g��3@�k��Fcc��M)���3@`��MEcc�1�*��3@&:�,Bcc�1�*��3@�~��@cc�1�*��3@��u?cc���Ք�3@��?�:cc�%���3@*�-9cc��@�ȓ�3@,g~5cc��@�ȓ�3@p��e1cc�MJA���3@*oG8-cc���n���3@ �^)cc���3��3@J$��(cc��aۢ�3@�G��'cc�ۿ�Ҥ�3@S�1�#cc�[Υ���3@�� h"cc�1�����3@+�3!cc������3@Ǜ�cc�뫫��3@EdX�cc���Q,��3@�Pkcc�$����3@�Z��cc�\Z
�{�3@BC�cc�ꕲq�3@��Vcc��p�Qe�3@��"cc��HP�3@l>�
cc�1��PN�3@㈵�cc�� K�3@㈵�cc��2��A�3@㈵�cc���?�3@��"cc�� �8�3@���cc�ܝ��.�3@��V cc���ME*�3@��occ�
L�u�3@:̗cc����3@�qm�cc�\U�]�3@���cc�W����3@@h=|cc���| �3@4�9Acc�_B��3@-�}cc�b/��3@ӈ�}cc�Y���.�3@%�s}cc��v�1�3@�9�!cc���X32�3@��%cc���
�:�3@��.%cc���
�:�3@�Z&cc���
�:�3@n��)cc��;�2�3@�yq�+cc�>v()�3@0,�-cc����n,�3@֬3�/cc��U�3�3@�&�0cc�g&�5�3@Ly 2cc�g&�5�3@�P295cc��?�0�3@���6cc���q�&�3@O��:7cc�^�pX�3@�;��7cc����i�3@N���8cc�h�ej�3@��|#:cc��+ٱ�3@�H�>cc��i�:�3@P�L�?cc��i�:�3@2���@cc�^�pX�3@����@cc��ĭ��3@����Acc��ĭ��3@rl=Ccc�Gx$�3@�>$Dcc��;�%�3@�OEcc��t�(%�3@6#��Ecc����!�3@�!��Fcc��i�:�3@���{Gcc����3@j0
�Gcc����
�3@���>Hcc�?e��3@�$�Icc���2��3@�TN{Jcc�zo�3@�SW>Kcc��F�q��3@PR`Lcc������3@eMcc��F�q��3@75�|Ncc��}���3@
��Pcc������3@��g?Rcc������3@��Ucc�l�����3@�EVcc�����3@�HVcc�����3@ failed
---------------------------------------------------------------------------
ArrowException Traceback (most recent call last)
/opt/conda/envs/rapids/lib/python3.8/site-packages/IPython/core/formatters.py in __call__(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:
/opt/conda/envs/rapids/lib/python3.8/contextlib.py in inner(*args, **kwds)
73 def inner(*args, **kwds):
74 with self._recreate_cm():
---> 75 return func(*args, **kwds)
76 return inner
77
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/dataframe.py in _repr_html_(self)
1787 def _repr_html_(self):
1788 lines = (
-> 1789 self._get_renderable_dataframe()
1790 .to_pandas()
1791 ._repr_html_()
/opt/conda/envs/rapids/lib/python3.8/contextlib.py in inner(*args, **kwds)
73 def inner(*args, **kwds):
74 with self._recreate_cm():
---> 75 return func(*args, **kwds)
76 return inner
77
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/dataframe.py in to_pandas(self, nullable, **kwargs)
4332
4333 for i, col_key in enumerate(self._data):
-> 4334 out_data[i] = self._data[col_key].to_pandas(
4335 index=out_index, nullable=nullable
4336 )
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/column/string.py in to_pandas(self, index, nullable, **kwargs)
5290 pd_series = pd.Series(pandas_array, copy=False)
5291 else:
-> 5292 pd_series = self.to_arrow().to_pandas(**kwargs)
5293
5294 if index is not None:
/opt/conda/envs/rapids/lib/python3.8/site-packages/pyarrow/array.pxi in pyarrow.lib._PandasConvertible.to_pandas()
/opt/conda/envs/rapids/lib/python3.8/site-packages/pyarrow/array.pxi in pyarrow.lib.Array._to_pandas()
/opt/conda/envs/rapids/lib/python3.8/site-packages/pyarrow/array.pxi in pyarrow.lib._array_like_to_pandas()
/opt/conda/envs/rapids/lib/python3.8/site-packages/pyarrow/error.pxi in pyarrow.lib.check_status()
ArrowException: Unknown error: Wrapping _�HVcc�Υ����3@!��Vcc�b�{���3@��KqUcc��v�1�3@�1ZGUcc��&k�C�3@oc�#Ucc���}8H�3@��hUcc����I�3@F���Tcc�z�W�3@��~�Tcc�M֨�h�3@� 1�Tcc��,D��3@L�uTcc��{��3@�qScc�$�&ݖ�3@`!sePcc�`�|x��3@%#gaOcc���g��3@&W��Mcc���g��3@4w�Icc���g��3@('�UHcc���g��3@�k��Fcc��M)���3@`��MEcc�1�*��3@&:�,Bcc�1�*��3@�~��@cc�1�*��3@��u?cc���Ք�3@��?�:cc�%���3@*�-9cc��@�ȓ�3@,g~5cc��@�ȓ�3@p��e1cc�MJA���3@*oG8-cc���n���3@ �^)cc���3��3@J$��(cc��aۢ�3@�G��'cc�ۿ�Ҥ�3@S�1�#cc�[Υ���3@�� h"cc�1�����3@+�3!cc������3@Ǜ�cc�뫫��3@EdX�cc���Q,��3@�Pkcc�$����3@�Z��cc�\Z
�{�3@BC�cc�ꕲq�3@��Vcc��p�Qe�3@��"cc��HP�3@l>�
cc�1��PN�3@㈵�cc�� K�3@㈵�cc��2��A�3@㈵�cc���?�3@��"cc�� �8�3@���cc�ܝ��.�3@��V cc���ME*�3@��occ�
L�u�3@:̗cc����3@�qm�cc�\U�]�3@���cc�W����3@@h=|cc���| �3@4�9Acc�_B��3@-�}cc�b/��3@ӈ�}cc�Y���.�3@%�s}cc��v�1�3@�9�!cc���X32�3@��%cc���
�:�3@��.%cc���
�:�3@�Z&cc���
�:�3@n��)cc��;�2�3@�yq�+cc�>v()�3@0,�-cc����n,�3@֬3�/cc��U�3�3@�&�0cc�g&�5�3@Ly 2cc�g&�5�3@�P295cc��?�0�3@���6cc���q�&�3@O��:7cc�^�pX�3@�;��7cc����i�3@N���8cc�h�ej�3@��|#:cc��+ٱ�3@�H�>cc��i�:�3@P�L�?cc��i�:�3@2���@cc�^�pX�3@����@cc��ĭ��3@����Acc��ĭ��3@rl=Ccc�Gx$�3@�>$Dcc��;�%�3@�OEcc��t�(%�3@6#��Ecc����!�3@�!��Fcc��i�:�3@���{Gcc����3@j0
�Gcc����
�3@���>Hcc�?e��3@�$�Icc���2��3@�TN{Jcc�zo�3@�SW>Kcc��F�q��3@PR`Lcc������3@eMcc��F�q��3@75�|Ncc��}���3@
��Pcc������3@��g?Rcc������3@��Ucc�l�����3@�EVcc�����3@�HVcc�����3@ failed
---------------------------------------------------------------------------
ArrowException Traceback (most recent call last)
/opt/conda/envs/rapids/lib/python3.8/site-packages/IPython/core/formatters.py in __call__(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:
/opt/conda/envs/rapids/lib/python3.8/contextlib.py in inner(*args, **kwds)
73 def inner(*args, **kwds):
74 with self._recreate_cm():
---> 75 return func(*args, **kwds)
76 return inner
77
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/dataframe.py in _repr_latex_(self)
1803 @_cudf_nvtx_annotate
1804 def _repr_latex_(self):
-> 1805 return self._get_renderable_dataframe().to_pandas()._repr_latex_()
1806
1807 @_cudf_nvtx_annotate
/opt/conda/envs/rapids/lib/python3.8/contextlib.py in inner(*args, **kwds)
73 def inner(*args, **kwds):
74 with self._recreate_cm():
---> 75 return func(*args, **kwds)
76 return inner
77
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/dataframe.py in to_pandas(self, nullable, **kwargs)
4332
4333 for i, col_key in enumerate(self._data):
-> 4334 out_data[i] = self._data[col_key].to_pandas(
4335 index=out_index, nullable=nullable
4336 )
/opt/conda/envs/rapids/lib/python3.8/site-packages/cudf/core/column/string.py in to_pandas(self, index, nullable, **kwargs)
5290 pd_series = pd.Series(pandas_array, copy=False)
5291 else:
-> 5292 pd_series = self.to_arrow().to_pandas(**kwargs)
5293
5294 if index is not None:
/opt/conda/envs/rapids/lib/python3.8/site-packages/pyarrow/array.pxi in pyarrow.lib._PandasConvertible.to_pandas()
/opt/conda/envs/rapids/lib/python3.8/site-packages/pyarrow/array.pxi in pyarrow.lib.Array._to_pandas()
/opt/conda/envs/rapids/lib/python3.8/site-packages/pyarrow/array.pxi in pyarrow.lib._array_like_to_pandas()
/opt/conda/envs/rapids/lib/python3.8/site-packages/pyarrow/error.pxi in pyarrow.lib.check_status()
ArrowException: Unknown error: Wrapping _�HVcc�Υ����3@!��Vcc�b�{���3@��KqUcc��v�1�3@�1ZGUcc��&k�C�3@oc�#Ucc���}8H�3@��hUcc����I�3@F���Tcc�z�W�3@��~�Tcc�M֨�h�3@� 1�Tcc��,D��3@L�uTcc��{��3@�qScc�$�&ݖ�3@`!sePcc�`�|x��3@%#gaOcc���g��3@&W��Mcc���g��3@4w�Icc���g��3@('�UHcc���g��3@�k��Fcc��M)���3@`��MEcc�1�*��3@&:�,Bcc�1�*��3@�~��@cc�1�*��3@��u?cc���Ք�3@��?�:cc�%���3@*�-9cc��@�ȓ�3@,g~5cc��@�ȓ�3@p��e1cc�MJA���3@*oG8-cc���n���3@ �^)cc���3��3@J$��(cc��aۢ�3@�G��'cc�ۿ�Ҥ�3@S�1�#cc�[Υ���3@�� h"cc�1�����3@+�3!cc������3@Ǜ�cc�뫫��3@EdX�cc���Q,��3@�Pkcc�$����3@�Z��cc�\Z
�{�3@BC�cc�ꕲq�3@��Vcc��p�Qe�3@��"cc��HP�3@l>�
cc�1��PN�3@㈵�cc�� K�3@㈵�cc��2��A�3@㈵�cc���?�3@��"cc�� �8�3@���cc�ܝ��.�3@��V cc���ME*�3@��occ�
L�u�3@:̗cc����3@�qm�cc�\U�]�3@���cc�W����3@@h=|cc���| �3@4�9Acc�_B��3@-�}cc�b/��3@ӈ�}cc�Y���.�3@%�s}cc��v�1�3@�9�!cc���X32�3@��%cc���
�:�3@��.%cc���
�:�3@�Z&cc���
�:�3@n��)cc��;�2�3@�yq�+cc�>v()�3@0,�-cc����n,�3@֬3�/cc��U�3�3@�&�0cc�g&�5�3@Ly 2cc�g&�5�3@�P295cc��?�0�3@���6cc���q�&�3@O��:7cc�^�pX�3@�;��7cc����i�3@N���8cc�h�ej�3@��|#:cc��+ٱ�3@�H�>cc��i�:�3@P�L�?cc��i�:�3@2���@cc�^�pX�3@����@cc��ĭ��3@����Acc��ĭ��3@rl=Ccc�Gx$�3@�>$Dcc��;�%�3@�OEcc��t�(%�3@6#��Ecc����!�3@�!��Fcc��i�:�3@���{Gcc����3@j0
�Gcc����
�3@���>Hcc�?e��3@�$�Icc���2��3@�TN{Jcc�zo�3@�SW>Kcc��F�q��3@PR`Lcc������3@eMcc��F�q��3@75�|Ncc��}���3@
��Pcc������3@��g?Rcc������3@��Ucc�l�����3@�EVcc�����3@�HVcc�����3@ failed
Additional context Add any other context, code examples, or references to existing implementations about the feature request here.
Can you do gdf.columns
after gdf = cudf.read_parquet(file.parquet)
?
This issue has been labeled inactive-30d
due to no recent activity in the past 30 days. Please close this issue if no further response or action is needed. Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed. This issue will be labeled inactive-90d
if there is no activity in the next 60 days.