enstop
enstop copied to clipboard
FloatingPointError. NMF
I can't run NMF algorithm. When I run:
%%time
nmf_model = NMF(n_components=20, beta_loss='kullback-leibler', solver='mu').fit(data)
... I see the following error stack :
---------------------------------------------------------------------------
FloatingPointError Traceback (most recent call last)
<timed exec> in <module>
d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_nmf.py in fit(self, X, y, **params)
1310 self
1311 """
-> 1312 self.fit_transform(X, **params)
1313 return self
1314
d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_nmf.py in fit_transform(self, X, y, W, H)
1285 l1_ratio=self.l1_ratio, regularization='both',
1286 random_state=self.random_state, verbose=self.verbose,
-> 1287 shuffle=self.shuffle)
1288
1289 self.reconstruction_err_ = _beta_divergence(X, W, H, self.beta_loss,
d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_nmf.py in non_negative_factorization(X, W, H, n_components, init, update_H, solver, beta_loss, tol, max_iter, alpha, l1_ratio, regularization, random_state, verbose, shuffle)
1067 tol, l1_reg_W, l1_reg_H,
1068 l2_reg_W, l2_reg_H, update_H,
-> 1069 verbose)
1070
1071 else:
d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_nmf.py in _fit_multiplicative_update(X, W, H, beta_loss, max_iter, tol, l1_reg_W, l1_reg_H, l2_reg_W, l2_reg_H, update_H, verbose)
810 if update_H:
811 delta_H = _multiplicative_update_h(X, W, H, beta_loss, l1_reg_H,
--> 812 l2_reg_H, gamma)
813 H *= delta_H
814
d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_nmf.py in _multiplicative_update_h(X, W, H, beta_loss, l1_reg_H, l2_reg_H, gamma)
634 else:
635 # Numerator
--> 636 WH_safe_X = _special_sparse_dot(W, H, X)
637 if sp.issparse(X):
638 WH_safe_X_data = WH_safe_X.data
d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_nmf.py in _special_sparse_dot(W, H, X)
178 batch = slice(start, start + batch_size)
179 dot_vals[batch] = np.multiply(W[ii[batch], :],
--> 180 H.T[jj[batch], :]).sum(axis=1)
181
182 WH = sp.coo_matrix((dot_vals, (ii, jj)), shape=X.shape)
FloatingPointError: underflow encountered in multiply
I also have the same error for LatentDirichletAllocation if I choose 448 clusters for 25000 rows:
%%time
lda_model = LatentDirichletAllocation(n_components=448).fit(data_vec)
---------------------------------------------------------------------------
FloatingPointError Traceback (most recent call last)
<timed exec> in <module>
d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_online_lda.py in fit(self, X, y)
566 # batch update
567 self._em_step(X, total_samples=n_samples,
--> 568 batch_update=True, parallel=parallel)
569
570 # check perplexity
d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_online_lda.py in _em_step(self, X, total_samples, batch_update, parallel)
446 # E-step
447 _, suff_stats = self._e_step(X, cal_sstats=True, random_init=True,
--> 448 parallel=parallel)
449
450 # M-step
d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_online_lda.py in _e_step(self, X, cal_sstats, random_init, parallel)
399 self.mean_change_tol, cal_sstats,
400 random_state)
--> 401 for idx_slice in gen_even_slices(X.shape[0], n_jobs))
402
403 # merge result
d:\pycharmprojects\biclustering\venv\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1001 # remaining jobs.
1002 self._iterating = False
-> 1003 if self.dispatch_one_batch(iterator):
1004 self._iterating = self._original_iterator is not None
1005
d:\pycharmprojects\biclustering\venv\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
832 return False
833 else:
--> 834 self._dispatch(tasks)
835 return True
836
d:\pycharmprojects\biclustering\venv\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
751 with self._lock:
752 job_idx = len(self._jobs)
--> 753 job = self._backend.apply_async(batch, callback=cb)
754 # A job can complete so quickly than its callback is
755 # called before we get here, causing self._jobs to
d:\pycharmprojects\biclustering\venv\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
199 def apply_async(self, func, callback=None):
200 """Schedule a func to be run"""
--> 201 result = ImmediateResult(func)
202 if callback:
203 callback(result)
d:\pycharmprojects\biclustering\venv\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
580 # Don't delay the application, to avoid keeping the input
581 # arguments in memory
--> 582 self.results = batch()
583
584 def get(self):
d:\pycharmprojects\biclustering\venv\lib\site-packages\joblib\parallel.py in __call__(self)
254 with parallel_backend(self._backend, n_jobs=self._n_jobs):
255 return [func(*args, **kwargs)
--> 256 for func, args, kwargs in self.items]
257
258 def __len__(self):
d:\pycharmprojects\biclustering\venv\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
254 with parallel_backend(self._backend, n_jobs=self._n_jobs):
255 return [func(*args, **kwargs)
--> 256 for func, args, kwargs in self.items]
257
258 def __len__(self):
d:\pycharmprojects\biclustering\venv\lib\site-packages\sklearn\decomposition\_online_lda.py in _update_doc_distribution(X, exp_topic_word_distr, doc_topic_prior, max_iters, mean_change_tol, cal_sstats, random_state)
115
116 doc_topic_d = (exp_doc_topic_d *
--> 117 np.dot(cnts / norm_phi, exp_topic_word_d.T))
118 # Note: adds doc_topic_prior to doc_topic_d, in-place.
119 _dirichlet_expectation_1d(doc_topic_d, doc_topic_prior,
FloatingPointError: underflow encountered in multiply
Could you please help? I am using Python 3.7.5 x64. Windows 10.