umap
umap copied to clipboard
AttributeError: 'list' object has no attribute 'dtype'
Hi!
I am using different values of UMAP hyperparameters on the MNIST dataset (downloaded from https://www.kaggle.com/datasets/oddrationale/mnist-in-csv). It runs normally except for some specific values of hyperparameters. The error is:
AttributeError: 'list' object has no attribute 'dtype'
Any help?
Thanks!
Here is the reproducible code:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import umap
from umap.umap_ import nearest_neighbors
# dataset from https://www.kaggle.com/datasets/oddrationale/mnist-in-csv
mnist_train = pd.read_csv('mnist/mnist_train.csv')
data = mnist_train.values[:, 1:]
target = mnist_train.values[:, 0]
std = np.std(data, axis = 0)
data = data[:, std > 0]
data = data/255
sc = StandardScaler(with_std=False)
data_std = sc.fit_transform(data)
pca = PCA(n_components=None)
data_pca = pca.fit_transform(data_std)
data_pca = data_pca[:, :153]
precomputed_knn = nearest_neighbors(
data_pca, n_neighbors = 3000, metric="euclidean",
metric_kwds=None, angular=False, random_state=1)
reducer = umap.UMAP(
learning_rate = 0.1532398922014837,
n_neighbors = 2,
min_dist = 0.0068166773845126,
n_epochs = 371,
n_components = 2,
random_state=2,
densmap = True,
dens_lambda = 7.877380090924162e-05,
dens_frac = 0.0148902602954258,
dens_var_shift = 0.0001287917105897,
precomputed_knn=precomputed_knn)
# about 30 minutes of running code
embedding = reducer.fit_transform(data_pca)
And this is the traceback:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Input In [18], in <cell line: 13>()
1 reducer = umap.UMAP(learning_rate = 0.1532398922014837,
2 n_neighbors = 2,
3 min_dist = 0.0068166773845126,
(...)
10 dens_var_shift = 0.0001287917105897,
11 precomputed_knn=precomputed_knn)
---> 13 embedding = reducer.fit_transform(data_pca)
File ~/anaconda3/lib/python3.9/site-packages/umap/umap_.py:2772, in UMAP.fit_transform(self, X, y)
2742 def fit_transform(self, X, y=None):
2743 """Fit X into an embedded space and return that transformed
2744 output.
2745
(...)
2770 Local radii of data points in the embedding (log-transformed).
2771 """
-> 2772 self.fit(X, y)
2773 if self.transform_mode == "embedding":
2774 if self.output_dens:
File ~/anaconda3/lib/python3.9/site-packages/umap/umap_.py:2684, in UMAP.fit(self, X, y)
2681 print(ts(), "Construct embedding")
2683 if self.transform_mode == "embedding":
-> 2684 self.embedding_, aux_data = self._fit_embed_data(
2685 self._raw_data[index],
2686 self.n_epochs,
2687 init,
2688 random_state, # JH why raw data?
2689 )
2690 # Assign any points that are fully disconnected from our manifold(s) to have embedding
2691 # coordinates of np.nan. These will be filtered by our plotting functions automatically.
2692 # They also prevent users from being deceived a distance query to one of these points.
2693 # Might be worth moving this into simplicial_set_embedding or _fit_embed_data
2694 disconnected_vertices = np.array(self.graph_.sum(axis=1)).flatten() == 0
File ~/anaconda3/lib/python3.9/site-packages/umap/umap_.py:2717, in UMAP._fit_embed_data(self, X, n_epochs, init, random_state)
2713 def _fit_embed_data(self, X, n_epochs, init, random_state):
2714 """A method wrapper for simplicial_set_embedding that can be
2715 replaced by subclasses.
2716 """
-> 2717 return simplicial_set_embedding(
2718 X,
2719 self.graph_,
2720 self.n_components,
2721 self._initial_alpha,
2722 self._a,
2723 self._b,
2724 self.repulsion_strength,
2725 self.negative_sample_rate,
2726 n_epochs,
2727 init,
2728 random_state,
2729 self._input_distance_func,
2730 self._metric_kwds,
2731 self.densmap,
2732 self._densmap_kwds,
2733 self.output_dens,
2734 self._output_distance_func,
2735 self._output_metric_kwds,
2736 self.output_metric in ("euclidean", "l2"),
2737 self.random_state is None,
2738 self.verbose,
2739 tqdm_kwds=self.tqdm_kwds,
2740 )
File ~/anaconda3/lib/python3.9/site-packages/umap/umap_.py:1078, in simplicial_set_embedding(data, graph, n_components, initial_alpha, a, b, gamma, negative_sample_rate, n_epochs, init, random_state, metric, metric_kwds, densmap, densmap_kwds, output_dens, output_metric, output_metric_kwds, euclidean_output, parallel, verbose, tqdm_kwds)
1073 embedding = random_state.uniform(
1074 low=-10.0, high=10.0, size=(graph.shape[0], n_components)
1075 ).astype(np.float32)
1076 elif isinstance(init, str) and init == "spectral":
1077 # We add a little noise to avoid local minima for optimization to come
-> 1078 initialisation = spectral_layout(
1079 data,
1080 graph,
1081 n_components,
1082 random_state,
1083 metric=metric,
1084 metric_kwds=metric_kwds,
1085 )
1086 expansion = 10.0 / np.abs(initialisation).max()
1087 embedding = (initialisation * expansion).astype(
1088 np.float32
1089 ) + random_state.normal(
(...)
1092 np.float32
1093 )
File ~/anaconda3/lib/python3.9/site-packages/umap/spectral.py:306, in spectral_layout(data, graph, dim, random_state, metric, metric_kwds)
303 n_components, labels = scipy.sparse.csgraph.connected_components(graph)
305 if n_components > 1:
--> 306 return multi_component_layout(
307 data,
308 graph,
309 n_components,
310 labels,
311 dim,
312 random_state,
313 metric=metric,
314 metric_kwds=metric_kwds,
315 )
317 diag_data = np.asarray(graph.sum(axis=0))
318 # standard Laplacian
319 # D = scipy.sparse.spdiags(diag_data, 0, graph.shape[0], graph.shape[0])
320 # L = D - graph
321 # Normalized Laplacian
File ~/anaconda3/lib/python3.9/site-packages/umap/spectral.py:212, in multi_component_layout(data, graph, n_components, component_labels, dim, random_state, metric, metric_kwds)
209 component_graph = graph.tocsr()[component_labels == label, :].tocsc()
210 component_graph = component_graph[:, component_labels == label].tocoo()
--> 212 distances = pairwise_distances([meta_embedding[label]], meta_embedding)
213 data_range = distances[distances > 0.0].min() / 2.0
215 if component_graph.shape[0] < 2 * dim or component_graph.shape[0] <= dim + 1:
File ~/anaconda3/lib/python3.9/site-packages/daal4py/sklearn/_device_offload.py:83, in support_usm_ndarray.<locals>.decorator.<locals>.wrapper_free(*args, **kwargs)
81 @wraps(func)
82 def wrapper_free(*args, **kwargs):
---> 83 return wrapper_impl(None, *args, **kwargs)
File ~/anaconda3/lib/python3.9/site-packages/daal4py/sklearn/_device_offload.py:74, in support_usm_ndarray.<locals>.decorator.<locals>.wrapper_impl(obj, *args, **kwargs)
72 usm_iface = _extract_usm_iface(*args, **kwargs)
73 q, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs)
---> 74 result = _run_on_device(func, q, obj, *hostargs, **hostkwargs)
75 if usm_iface is not None and hasattr(result, '__array_interface__'):
76 return _copy_to_usm(q, result)
File ~/anaconda3/lib/python3.9/site-packages/daal4py/sklearn/_device_offload.py:65, in _run_on_device(func, queue, obj, *args, **kwargs)
62 with sycl_context('gpu' if queue.sycl_device.is_gpu else 'cpu',
63 host_offload_on_fail=host_offload):
64 return dispatch_by_obj(obj, func, *args, **kwargs)
---> 65 return dispatch_by_obj(obj, func, *args, **kwargs)
File ~/anaconda3/lib/python3.9/site-packages/daal4py/sklearn/_device_offload.py:54, in _run_on_device.<locals>.dispatch_by_obj(obj, func, *args, **kwargs)
52 if obj is not None:
53 return func(obj, *args, **kwargs)
---> 54 return func(*args, **kwargs)
File ~/anaconda3/lib/python3.9/site-packages/daal4py/sklearn/metrics/_pairwise.py:169, in daal_pairwise_distances(X, Y, metric, n_jobs, force_all_finite, **kwds)
158 raise ValueError("Unknown metric %s. Valid metrics are %s, or 'precomputed', "
159 "or a callable" % (metric, _VALID_METRICS))
161 _patching_status = PatchingConditionsChain(
162 "sklearn.metrics.pairwise_distances")
163 _dal_ready = _patching_status.and_conditions([
164 (metric == 'cosine' or metric == 'correlation',
165 f"'{metric}' metric is not supported. "
166 "Only 'cosine' and 'correlation' metrics are supported."),
167 (Y is None, "Second feature array is not supported."),
168 (not issparse(X), "X is sparse. Sparse input is not supported."),
--> 169 (X.dtype == np.float64,
170 f"{X.dtype} X data type is not supported. Only np.float64 is supported.")
171 ])
172 _patching_status.write_log()
173 if _dal_ready:
AttributeError: 'list' object has no attribute 'dtype'