umap icon indicating copy to clipboard operation
umap copied to clipboard

AttributeError: 'list' object has no attribute 'dtype'

Open NamLQ opened this issue 3 years ago • 0 comments

Hi! I am using different values of UMAP hyperparameters on the MNIST dataset (downloaded from https://www.kaggle.com/datasets/oddrationale/mnist-in-csv). It runs normally except for some specific values of hyperparameters. The error is: AttributeError: 'list' object has no attribute 'dtype'

Any help?

Thanks!

Here is the reproducible code:

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import umap
from umap.umap_ import nearest_neighbors

# dataset from https://www.kaggle.com/datasets/oddrationale/mnist-in-csv
mnist_train = pd.read_csv('mnist/mnist_train.csv')

data = mnist_train.values[:, 1:]
target = mnist_train.values[:, 0]

std = np.std(data, axis = 0)
data = data[:, std > 0]

data = data/255

sc = StandardScaler(with_std=False)
data_std = sc.fit_transform(data)

pca = PCA(n_components=None)
data_pca = pca.fit_transform(data_std)

data_pca = data_pca[:, :153]

precomputed_knn = nearest_neighbors(
        data_pca, n_neighbors = 3000, metric="euclidean",
        metric_kwds=None, angular=False, random_state=1)

reducer = umap.UMAP(
        learning_rate = 0.1532398922014837,
        n_neighbors = 2,
        min_dist = 0.0068166773845126,
        n_epochs = 371,
        n_components = 2, 
        random_state=2, 
        densmap = True,
        dens_lambda = 7.877380090924162e-05,
        dens_frac = 0.0148902602954258,
        dens_var_shift = 0.0001287917105897,
        precomputed_knn=precomputed_knn)

# about 30 minutes of running code
embedding = reducer.fit_transform(data_pca)

And this is the traceback:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Input In [18], in <cell line: 13>()
      1 reducer = umap.UMAP(learning_rate = 0.1532398922014837,
      2                     n_neighbors = 2,
      3                     min_dist =  0.0068166773845126,
   (...)
     10                     dens_var_shift = 0.0001287917105897,
     11                     precomputed_knn=precomputed_knn)
---> 13 embedding = reducer.fit_transform(data_pca)

File ~/anaconda3/lib/python3.9/site-packages/umap/umap_.py:2772, in UMAP.fit_transform(self, X, y)
   2742 def fit_transform(self, X, y=None):
   2743     """Fit X into an embedded space and return that transformed
   2744     output.
   2745 
   (...)
   2770         Local radii of data points in the embedding (log-transformed).
   2771     """
-> 2772     self.fit(X, y)
   2773     if self.transform_mode == "embedding":
   2774         if self.output_dens:

File ~/anaconda3/lib/python3.9/site-packages/umap/umap_.py:2684, in UMAP.fit(self, X, y)
   2681     print(ts(), "Construct embedding")
   2683 if self.transform_mode == "embedding":
-> 2684     self.embedding_, aux_data = self._fit_embed_data(
   2685         self._raw_data[index],
   2686         self.n_epochs,
   2687         init,
   2688         random_state,  # JH why raw data?
   2689     )
   2690     # Assign any points that are fully disconnected from our manifold(s) to have embedding
   2691     # coordinates of np.nan.  These will be filtered by our plotting functions automatically.
   2692     # They also prevent users from being deceived a distance query to one of these points.
   2693     # Might be worth moving this into simplicial_set_embedding or _fit_embed_data
   2694     disconnected_vertices = np.array(self.graph_.sum(axis=1)).flatten() == 0

File ~/anaconda3/lib/python3.9/site-packages/umap/umap_.py:2717, in UMAP._fit_embed_data(self, X, n_epochs, init, random_state)
   2713 def _fit_embed_data(self, X, n_epochs, init, random_state):
   2714     """A method wrapper for simplicial_set_embedding that can be
   2715     replaced by subclasses.
   2716     """
-> 2717     return simplicial_set_embedding(
   2718         X,
   2719         self.graph_,
   2720         self.n_components,
   2721         self._initial_alpha,
   2722         self._a,
   2723         self._b,
   2724         self.repulsion_strength,
   2725         self.negative_sample_rate,
   2726         n_epochs,
   2727         init,
   2728         random_state,
   2729         self._input_distance_func,
   2730         self._metric_kwds,
   2731         self.densmap,
   2732         self._densmap_kwds,
   2733         self.output_dens,
   2734         self._output_distance_func,
   2735         self._output_metric_kwds,
   2736         self.output_metric in ("euclidean", "l2"),
   2737         self.random_state is None,
   2738         self.verbose,
   2739         tqdm_kwds=self.tqdm_kwds,
   2740     )

File ~/anaconda3/lib/python3.9/site-packages/umap/umap_.py:1078, in simplicial_set_embedding(data, graph, n_components, initial_alpha, a, b, gamma, negative_sample_rate, n_epochs, init, random_state, metric, metric_kwds, densmap, densmap_kwds, output_dens, output_metric, output_metric_kwds, euclidean_output, parallel, verbose, tqdm_kwds)
   1073     embedding = random_state.uniform(
   1074         low=-10.0, high=10.0, size=(graph.shape[0], n_components)
   1075     ).astype(np.float32)
   1076 elif isinstance(init, str) and init == "spectral":
   1077     # We add a little noise to avoid local minima for optimization to come
-> 1078     initialisation = spectral_layout(
   1079         data,
   1080         graph,
   1081         n_components,
   1082         random_state,
   1083         metric=metric,
   1084         metric_kwds=metric_kwds,
   1085     )
   1086     expansion = 10.0 / np.abs(initialisation).max()
   1087     embedding = (initialisation * expansion).astype(
   1088         np.float32
   1089     ) + random_state.normal(
   (...)
   1092         np.float32
   1093     )

File ~/anaconda3/lib/python3.9/site-packages/umap/spectral.py:306, in spectral_layout(data, graph, dim, random_state, metric, metric_kwds)
    303 n_components, labels = scipy.sparse.csgraph.connected_components(graph)
    305 if n_components > 1:
--> 306     return multi_component_layout(
    307         data,
    308         graph,
    309         n_components,
    310         labels,
    311         dim,
    312         random_state,
    313         metric=metric,
    314         metric_kwds=metric_kwds,
    315     )
    317 diag_data = np.asarray(graph.sum(axis=0))
    318 # standard Laplacian
    319 # D = scipy.sparse.spdiags(diag_data, 0, graph.shape[0], graph.shape[0])
    320 # L = D - graph
    321 # Normalized Laplacian

File ~/anaconda3/lib/python3.9/site-packages/umap/spectral.py:212, in multi_component_layout(data, graph, n_components, component_labels, dim, random_state, metric, metric_kwds)
    209 component_graph = graph.tocsr()[component_labels == label, :].tocsc()
    210 component_graph = component_graph[:, component_labels == label].tocoo()
--> 212 distances = pairwise_distances([meta_embedding[label]], meta_embedding)
    213 data_range = distances[distances > 0.0].min() / 2.0
    215 if component_graph.shape[0] < 2 * dim or component_graph.shape[0] <= dim + 1:

File ~/anaconda3/lib/python3.9/site-packages/daal4py/sklearn/_device_offload.py:83, in support_usm_ndarray.<locals>.decorator.<locals>.wrapper_free(*args, **kwargs)
     81 @wraps(func)
     82 def wrapper_free(*args, **kwargs):
---> 83     return wrapper_impl(None, *args, **kwargs)

File ~/anaconda3/lib/python3.9/site-packages/daal4py/sklearn/_device_offload.py:74, in support_usm_ndarray.<locals>.decorator.<locals>.wrapper_impl(obj, *args, **kwargs)
     72 usm_iface = _extract_usm_iface(*args, **kwargs)
     73 q, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs)
---> 74 result = _run_on_device(func, q, obj, *hostargs, **hostkwargs)
     75 if usm_iface is not None and hasattr(result, '__array_interface__'):
     76     return _copy_to_usm(q, result)

File ~/anaconda3/lib/python3.9/site-packages/daal4py/sklearn/_device_offload.py:65, in _run_on_device(func, queue, obj, *args, **kwargs)
     62         with sycl_context('gpu' if queue.sycl_device.is_gpu else 'cpu',
     63                           host_offload_on_fail=host_offload):
     64             return dispatch_by_obj(obj, func, *args, **kwargs)
---> 65 return dispatch_by_obj(obj, func, *args, **kwargs)

File ~/anaconda3/lib/python3.9/site-packages/daal4py/sklearn/_device_offload.py:54, in _run_on_device.<locals>.dispatch_by_obj(obj, func, *args, **kwargs)
     52 if obj is not None:
     53     return func(obj, *args, **kwargs)
---> 54 return func(*args, **kwargs)

File ~/anaconda3/lib/python3.9/site-packages/daal4py/sklearn/metrics/_pairwise.py:169, in daal_pairwise_distances(X, Y, metric, n_jobs, force_all_finite, **kwds)
    158     raise ValueError("Unknown metric %s. Valid metrics are %s, or 'precomputed', "
    159                      "or a callable" % (metric, _VALID_METRICS))
    161 _patching_status = PatchingConditionsChain(
    162     "sklearn.metrics.pairwise_distances")
    163 _dal_ready = _patching_status.and_conditions([
    164     (metric == 'cosine' or metric == 'correlation',
    165         f"'{metric}' metric is not supported. "
    166         "Only 'cosine' and 'correlation' metrics are supported."),
    167     (Y is None, "Second feature array is not supported."),
    168     (not issparse(X), "X is sparse. Sparse input is not supported."),
--> 169     (X.dtype == np.float64,
    170         f"{X.dtype} X data type is not supported. Only np.float64 is supported.")
    171 ])
    172 _patching_status.write_log()
    173 if _dal_ready:

AttributeError: 'list' object has no attribute 'dtype'

NamLQ avatar Jul 20 '22 08:07 NamLQ