data-attribute-recommendation-python-sdk
data-attribute-recommendation-python-sdk copied to clipboard
Config proposal (API Simplification)
This is number 3 from #83 , @mhaas , what do you think?
import json
class Feature:
def __init__(self,columns:list=[]):
self.columns = columns
self.repr = self._get_def()
@property
def featuretype(self):
pass
def _get_def(self):
cols = []
for col in self.columns:
cols.append({"label": col, "type": self.featuretype})
return cols
def __add__(self,other):
self.repr=self.repr+other.repr
return self
def __repr__(self):
return str(self.repr)
class Category(Feature):
def __init__(self,columns:list=[]):
super(Category,self).__init__(columns)
@property
def featuretype(self):
return "CATEGORY"
class Text(Feature):
def __init__(self,columns:list=[]):
super(Text,self).__init__(columns)
@property
def featuretype(self):
return "TEXT"
class Number(Feature):
def __init__(self,columns:list=[]):
super(Number,self).__init__(columns)
@property
def featuretype(self):
return "NUMBER"
class Schema:
def __init__(self,features=None,labels=None,name=None):
self.features = features
self.labels = labels
self.name = name
self.repr = self._get_def()
def _get_def(self):
return json.dumps({
"features": self.features.repr,
"labels": self.labels.repr,
"name": self.name,
},indent=2)
def __repr__(self):
return str(self.repr)
features = Text(['MYTEXT'])+Number(['ANumeric'])+Category(['Manufacturee','description'])
labels = Category(['Label1','Label2'])+Number(['Label3'])
schema = Schema(features,labels,"bestbuy-category-prediction")
print(schema)
result: { "features": [ { "label": "MYTEXT", "type": "TEXT" }, { "label": "ANumeric", "type": "NUMBER" }, { "label": "Manufacturee", "type": "CATEGORY" }, { "label": "description", "type": "CATEGORY" } ], "labels": [ { "label": "Label1", "type": "CATEGORY" }, { "label": "Label2", "type": "CATEGORY" } ], "name": "bestbuy-category-prediction" }
Hi @DBusAI, this is very cool!
Can you open a PR with this change? You can also install pip install pre-commit and run pre-commit -a to automatically format your code according to our standards and also run all the tests.
Perhaps some preliminary feedback: the syntactic sugar you create here by overloading the + operator is nice, but it might be a bit too magic. My main concern here is that Text(['MYTEXT']) + Number(['ANumeric']) would permanently change one object. Perhaps it could return a new object instead?
Thinking about this a bit more, I also like the following, very simply approach:
features = [Category("foo"), Text("bar")]
labels = [Category("MyLabel")]
schema = Schema(features, labels, "bestbuy-category-prediction")
New code version:
class Feature:
def __init__(self,column:str):
self.column = column
self.repr = self._get_def()
@property
def featuretype(self):
pass
def _get_def(self):
return {"label": self.column, "type": self.featuretype}
def __repr__(self):
return str(self.repr)
class Category(Feature):
def __init__(self,column:str):
super(Category,self).__init__(column)
@property
def featuretype(self):
return "CATEGORY"
class Text(Feature):
def __init__(self,column:str):
super(Text,self).__init__(column)
@property
def featuretype(self):
return "TEXT"
class Number(Feature):
def __init__(self,column:str):
super(Number,self).__init__(column)
@property
def featuretype(self):
return "NUMBER"
class Schema:
def __init__(self,features=None,labels=None,name=None):
self.features = features
self.labels = labels
self.name = name
self.repr = self._get_def()
def _get_def(self):
return { "features": [i.repr for i in self.features],
"labels": [i.repr for i in self.labels],
"name": self.name
}
def __repr__(self):
return self.repr
def __call__(self):
return self.repr
features = [Category("manufacturer"), Text("description"),Number("price")]
labels = [Category("level1_category"),Category("level2_category"),Category("level3_category")]
schema = Schema(features, labels, "bestbuy-category-prediction")()