pygraphistry
pygraphistry copied to clipboard
[BUG] hackernews demo fails on merge branch
On http://localhost/notebook/lab/tree/demos/ai/Introduction/Ask-HackerNews-Demo.ipynb:
ile /opt/conda/envs/rapids/lib/python3.8/site-packages/graphistry/feature_utils.py:652, in impute_and_scale_df(df, use_scaler, impute, n_quantiles, output_distribution, quantile_range, n_bins, encode, strategy, keep_n_decimals)
629 def impute_and_scale_df(
630 df: pd.DataFrame,
631 use_scaler: str = "robust",
(...)
639 keep_n_decimals: int = 5,
640 ) -> Tuple[pd.DataFrame, Pipeline]:
642 transformer = get_preprocessing_pipeline(
643 impute=impute,
644 use_scaler=use_scaler,
(...)
650 strategy=strategy,
651 )
--> 652 res = fit_pipeline(df, transformer, keep_n_decimals=keep_n_decimals)
654 return res, transformer
File /opt/conda/envs/rapids/lib/python3.8/site-packages/graphistry/feature_utils.py:622, in fit_pipeline(X, transformer, keep_n_decimals)
619 columns = X.columns
620 index = X.index
--> 622 X = transformer.fit_transform(X)
623 if keep_n_decimals:
624 X = np.round(X, decimals=keep_n_decimals) # type: ignore # noqa
File /opt/conda/envs/rapids/lib/python3.8/site-packages/sklearn/pipeline.py:437, in Pipeline.fit_transform(self, X, y, **fit_params)
410 """Fit the model and transform with the final estimator.
411
412 Fits all the transformers one after the other and transform the
(...)
434 Transformed samples.
435 """
436 fit_params_steps = self._check_fit_params(**fit_params)
--> 437 Xt = self._fit(X, y, **fit_params_steps)
439 last_step = self._final_estimator
440 with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)):
File /opt/conda/envs/rapids/lib/python3.8/site-packages/sklearn/pipeline.py:339, in Pipeline._fit(self, X, y, **fit_params_steps)
336 def _fit(self, X, y=None, **fit_params_steps):
337 # shallow copy of steps - this should really be steps_
338 self.steps = list(self.steps)
--> 339 self._validate_steps()
340 # Setup the memory
341 memory = check_memory(self.memory)
File /opt/conda/envs/rapids/lib/python3.8/site-packages/sklearn/pipeline.py:243, in Pipeline._validate_steps(self)
237 # We allow last estimator to be None as an identity transformation
238 if (
239 estimator is not None
240 and estimator != "passthrough"
241 and not hasattr(estimator, "fit")
242 ):
--> 243 raise TypeError(
244 "Last step of Pipeline should implement fit "
245 "or be the string 'passthrough'. "
246 "'%s' (type %s) doesn't" % (estimator, type(estimator))
247 )
TypeError: Last step of Pipeline should implement fit or be the string 'passthrough'. '<function identity at 0x7fc7b4870430>' (type <class 'function'>) doesn't
as w/ other semantic search, now also getting:
File /opt/conda/envs/rapids/lib/python3.8/site-packages/pandas/io/excel/_base.py:1656, in ExcelFile.__init__(self, path_or_buffer, engine, storage_options)
1652 ext = inspect_excel_format(
1653 content_or_path=path_or_buffer, storage_options=storage_options
1654 )
1655 if ext is None:
-> 1656 raise ValueError(
1657 "Excel file format cannot be determined, you must specify "
1658 "an engine manually."
1659 )
1661 engine = config.get_option(f"io.excel.{ext}.reader", silent=True)
1662 if engine == "auto":
ValueError: Excel file format cannot be determined, you must specify an engine manually.
so semantic search may have an issue?
@tanmoyio search still problematic on hn demo even for 0.29.1 (g distro so w/ rapids):

I'm getting a lot of:
gr = g2.search_graph('How to create deep learning models', thresh=15, top_n=50, scale=0.25, broader=False)
gr.plot()
=>
**No results found due to empty DataFrame, returning original graph
I wonder if this is parameterization based on cpu vs gpu, or something deeper?