hamilton
hamilton copied to clipboard
[documentation] show how to get hamilton running on snowpark
Issue by skrawcz
Friday Dec 09, 2022 at 20:45 GMT
Originally opened as https://github.com/stitchfix/hamilton/issues/242
We need to help people get up and running with Hamilton on snowpark.
Two artifacts to produce:
- hamilton + dbt + snowpark
- hamilton + snowpark
Comment by skrawcz
Thursday Dec 15, 2022 at 07:01 GMT
Status on getting the hello world to run on snowpark:
create or replace function hamilton_hw()
-- table does not seem to work
-- returns Table ( spend float,
-- signups float,
-- avg_3wk_spend float,
-- spend_per_signup float,
-- spend_zero_mean_unit_variance float)
returns Object
language python
runtime_version = '3.8'
handler = 'main_py'
imports = ('@~/hamilton.zip', '@~/my_functions.py')
packages = ('pandas', 'typing_inspect', 'numpy', 'snowflake-snowpark-python')
as
$$
import pandas as pd
from hamilton import driver
import my_functions
def main_py() -> dict:
initial_columns = { # load from actuals or wherever -- this is our initial data we use as input.
# Note: these values don't have to be all series, they could be a scalar.
"signups": pd.Series([1, 10, 50, 100, 200, 400]),
"spend": pd.Series([10, 10, 20, 40, 40, 50]),
}
dr = driver.Driver(initial_columns, my_functions)
output_columns = [
"spend",
"signups",
"avg_3wk_spend",
"spend_per_signup",
"spend_zero_mean_unit_variance",
]
# let's create the dataframe!
df = dr.execute(output_columns)
return df.to_dict()
$$;
select hamilton_hw();
{
"avg_3wk_spend": {
"0": NaN,
"1": NaN,
"2": 13.333333333333334,
"3": 23.333333333333332,
"4": 33.333333333333336,
"5": 43.333333333333336
},
"signups": {
"0": 1,
"1": 10,
"2": 50,
"3": 100,
"4": 200,
"5": 400
},
"spend": {
"0": 10,
"1": 10,
"2": 20,
"3": 40,
"4": 40,
"5": 50
},
"spend_per_signup": {
"0": 10,
"1": 1,
"2": 0.4,
"3": 0.4,
"4": 0.2,
"5": 0.125
},
"spend_zero_mean_unit_variance": {
"0": -1.0644053746097524,
"1": -1.0644053746097524,
"2": -0.4838206248226147,
"3": 0.6773488747516607,
"4": 0.6773488747516607,
"5": 1.2579336245387984
}
}
Comment by skrawcz
Thursday Dec 15, 2022 at 07:06 GMT
Putting the driver logic in a module also works:
create or replace function hamilton_hw()
returns Object
language python
runtime_version = '3.8'
handler = 'my_script.main_py'
imports = ('@~/hamilton.zip', '@~/my_functions.py', '@~/my_script.py')
packages = ('pandas', 'typing_inspect', 'numpy', 'snowflake-snowpark-python')
;
Comment by skrawcz
Thursday Dec 15, 2022 at 22:45 GMT
As a UDTF:
create or replace function hamilton_udtf_hw()
returns Table ( index float,
spend float,
signups float,
avg_3wk_spend float,
spend_per_signup float,
spend_zero_mean_unit_variance float)
-- returns Object
language python
runtime_version = '3.8'
handler = 'Runner'
imports = ('@~/hamilton.zip', '@~/my_functions.py')
packages = ('pandas', 'typing_inspect', 'numpy', 'snowflake-snowpark-python')
as
$$
import pandas as pd
from hamilton import driver
import my_functions
class Runner(object):
def __init__(self):
pass
def process(self) -> (float, float, float, float, float):
initial_columns = { # load from actuals or wherever -- this is our initial data we use as input.
# Note: these values don't have to be all series, they could be a scalar.
"signups": pd.Series([1, 10, 50, 100, 200, 400]),
"spend": pd.Series([10, 10, 20, 40, 40, 50]),
}
dr = driver.Driver(initial_columns, my_functions)
output_columns = [
"spend",
"signups",
"avg_3wk_spend",
"spend_per_signup",
"spend_zero_mean_unit_variance",
]
# let's create the dataframe!
df = dr.execute(output_columns)
# return df.to_dict
for index, row in df.iterrows():
yield (index, row.spend, row.signups, row.avg_3wk_spend, row.spend_per_signup, row.spend_zero_mean_unit_variance)
$$;
select * from table(hamilton_udtf_hw());
Results in a nice table.