buckaroo
buckaroo copied to clipboard
Start using Generic types
Checks
- [x] I have checked that this enhancement has not already been requested
How would you categorize this request. You can select multiple if not sure
Developer ergonomics (defaults, error messages)
Enhancement Description
Typing DFStats, DataFlow, and BuckarooWidget are all difficult because they depend on implicit linkages
Most of DataFlow and BuckarooWidget are agnostic to the dataframe library The polars and pandas implementations of DFStats specifically exist to smooth over type incomatability
This pseudocode is a very good step towards applying generics to fix the problem while maintaining type safety
Pseudo Code Implementation
from typing_extensions import override
import pandas as pd
import polars as pl
from typing import Generic, Type, TypeVar, List
DFType = TypeVar(name='DFType')
class WorksOnDFBase(Generic[DFType]):
@classmethod
def get_df_columns(cls, df:DFType) -> List[str]:
...
class Base(Generic[DFType]):
internal_df :DFType
foo:Type[WorksOnDFBase[DFType]]
def __init__(self, df:DFType) -> None:
self.internal_df = df
class WorksOnPolars(WorksOnDFBase[pl.DataFrame]):
@override
@classmethod
def get_df_columns(cls, df) -> List[str]:
return [str(col) for col in df.columns]
class WorksOnPandas(WorksOnDFBase[pd.DataFrame]):
@override
@classmethod
def get_df_columns(cls, df:pd.DataFrame) -> List[str]:
return [str(col) for col in df.columns]
#this should work
class ExtendsBase(Base[pd.DataFrame]):
foo = WorksOnPandas
#this fails because WorksOnPolars is incompatible with pd.DataFrame
class ExtendsBase2(Base[pd.DataFrame]):
foo = WorksOnPolars
#this works work
class ExtendsBase3(Base[pl.DataFrame]):
foo = WorksOnPolars
Prior Art
n/a
Here's a better version that passes mypy
from abc import ABC, abstractmethod
from typing_extensions import override
import pandas as pd
import polars as pl
from typing import Generic, Type, TypeVar, List
DFType = TypeVar('DFType')
class WorksOnDFBase(Generic[DFType], ABC):
@abstractmethod
@classmethod
def get_df_columns(cls, df:DFType) -> List[str]:
pass
class Base(Generic[DFType]):
internal_df :DFType
foo:Type[WorksOnDFBase[DFType]]
@abstractmethod
def __init__(self, df:DFType) -> None:
pass
def get_columns(self, df:DFType) -> List[str]:
return self.foo.get_df_columns(df)
class WorksOnPolars(WorksOnDFBase[pl.DataFrame]):
@override
@classmethod
def get_df_columns(cls, df) -> List[str]:
return [str(col) for col in df.columns]
class WorksOnPandas(WorksOnDFBase[pd.DataFrame]):
@override
@classmethod
def get_df_columns(cls, df:pd.DataFrame) -> List[str]:
return [str(col) for col in df.columns]
#this should work
class ExtendsBase(Base[pd.DataFrame]):
foo = WorksOnPandas
# #this fails because WorksOnPolars is incompatible with pd.DataFrame
# class ExtendsBase2(Base[pd.DataFrame]):
# foo = WorksOnPolars
#this works work
class ExtendsBase3(Base[pl.DataFrame]):
foo = WorksOnPolars