pangeo-example-notebooks
pangeo-example-notebooks copied to clipboard
machine-learning.ipynb does not work
Executing line
km = dask_ml.cluster.KMeans(n_clusters=3, init_max_iter=2, oversampling_factor=10)
km.fit(X)
results in
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-14-7d3bc4c475f7> in <module>()
1 km = dask_ml.cluster.KMeans(n_clusters=3, init_max_iter=2, oversampling_factor=10)
----> 2 km.fit(X)
/opt/conda/lib/python3.6/site-packages/dask_ml/cluster/k_means.py in fit(self, X, y)
197 max_iter=self.max_iter,
198 init_max_iter=self.init_max_iter,
--> 199 tol=self.tol,
200 )
201 self.cluster_centers_ = centroids
/opt/conda/lib/python3.6/site-packages/dask_ml/cluster/k_means.py in k_means(X, n_clusters, init, precompute_distances, n_init, max_iter, verbose, tol, random_state, copy_x, n_jobs, algorithm, return_n_iter, oversampling_factor, init_max_iter)
268 random_state=random_state,
269 oversampling_factor=oversampling_factor,
--> 270 init_max_iter=init_max_iter,
271 )
272 if return_n_iter:
/opt/conda/lib/python3.6/site-packages/dask_ml/cluster/k_means.py in _kmeans_single_lloyd(X, n_clusters, max_iter, init, verbose, x_squared_norms, random_state, tol, precompute_distances, oversampling_factor, init_max_iter)
550 counts = da.maximum(counts, 1)
551 new_centers = new_centers / counts[:, None]
--> 552 new_centers, = compute(new_centers)
553
554 # Convergence check
/opt/conda/lib/python3.6/site-packages/dask/base.py in compute(*args, **kwargs)
400 keys = [x.__dask_keys__() for x in collections]
401 postcomputes = [x.__dask_postcompute__() for x in collections]
--> 402 results = schedule(dsk, keys, **kwargs)
403 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
404
/opt/conda/lib/python3.6/site-packages/distributed/client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, **kwargs)
2191 try:
2192 results = self.gather(packed, asynchronous=asynchronous,
-> 2193 direct=direct)
2194 finally:
2195 for f in futures.values():
/opt/conda/lib/python3.6/site-packages/distributed/client.py in gather(self, futures, errors, maxsize, direct, asynchronous)
1566 return self.sync(self._gather, futures, errors=errors,
1567 direct=direct, local_worker=local_worker,
-> 1568 asynchronous=asynchronous)
1569
1570 @gen.coroutine
/opt/conda/lib/python3.6/site-packages/distributed/client.py in sync(self, func, *args, **kwargs)
651 return future
652 else:
--> 653 return sync(self.loop, func, *args, **kwargs)
654
655 def __repr__(self):
/opt/conda/lib/python3.6/site-packages/distributed/utils.py in sync(loop, func, *args, **kwargs)
275 e.wait(10)
276 if error[0]:
--> 277 six.reraise(*error[0])
278 else:
279 return result[0]
/opt/conda/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
691 if value.__traceback__ is not tb:
692 raise value.with_traceback(tb)
--> 693 raise value
694 finally:
695 value = None
/opt/conda/lib/python3.6/site-packages/distributed/utils.py in f()
260 if timeout is not None:
261 future = gen.with_timeout(timedelta(seconds=timeout), future)
--> 262 result[0] = yield future
263 except Exception as exc:
264 error[0] = sys.exc_info()
/opt/conda/lib/python3.6/site-packages/tornado/gen.py in run(self)
1097
1098 try:
-> 1099 value = future.result()
1100 except Exception:
1101 self.had_exception = True
/opt/conda/lib/python3.6/site-packages/tornado/gen.py in run(self)
1105 if exc_info is not None:
1106 try:
-> 1107 yielded = self.gen.throw(*exc_info)
1108 finally:
1109 # Break up a reference to itself
/opt/conda/lib/python3.6/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker)
1445 six.reraise(type(exception),
1446 exception,
-> 1447 traceback)
1448 if errors == 'skip':
1449 bad_keys.add(key)
/opt/conda/lib/python3.6/site-packages/six.py in reraise(tp, value, tb)
690 value = tp()
691 if value.__traceback__ is not tb:
--> 692 raise value.with_traceback(tb)
693 raise value
694 finally:
/opt/conda/lib/python3.6/site-packages/numba/dispatcher.py in _compile_for_args()
366 e.patch_message(''.join(e.args) + help_msg)
367 # ignore the FULL_TRACEBACKS config, this needs reporting!
--> 368 raise e
369
370 def inspect_llvm(self, signature=None):
/opt/conda/lib/python3.6/site-packages/numba/dispatcher.py in _compile_for_args()
323 argtypes.append(self.typeof_pyval(a))
324 try:
--> 325 return self.compile(tuple(argtypes))
326 except errors.TypingError as e:
327 # Intercept typing error that may be due to an argument
/opt/conda/lib/python3.6/site-packages/numba/dispatcher.py in compile()
651
652 self._cache_misses[sig] += 1
--> 653 cres = self._compiler.compile(args, return_type)
654 self.add_overload(cres)
655 self._cache.save_overload(sig, cres)
/opt/conda/lib/python3.6/site-packages/numba/dispatcher.py in compile()
81 args=args, return_type=return_type,
82 flags=flags, locals=self.locals,
---> 83 pipeline_class=self.pipeline_class)
84 # Check typing error if object mode is used
85 if cres.typing_error is not None and not flags.enable_pyobject:
/opt/conda/lib/python3.6/site-packages/numba/compiler.py in compile_extra()
871 pipeline = pipeline_class(typingctx, targetctx, library,
872 args, return_type, flags, locals)
--> 873 return pipeline.compile_extra(func)
874
875
/opt/conda/lib/python3.6/site-packages/numba/compiler.py in compile_extra()
365 self.lifted = ()
366 self.lifted_from = None
--> 367 return self._compile_bytecode()
368
369 def compile_ir(self, func_ir, lifted=(), lifted_from=None):
/opt/conda/lib/python3.6/site-packages/numba/compiler.py in _compile_bytecode()
802 """
803 assert self.func_ir is None
--> 804 return self._compile_core()
805
806 def _compile_ir(self):
/opt/conda/lib/python3.6/site-packages/numba/compiler.py in _compile_core()
789 self.define_pipelines(pm)
790 pm.finalize()
--> 791 res = pm.run(self.status)
792 if res is not None:
793 # Early pipeline completion
/opt/conda/lib/python3.6/site-packages/numba/compiler.py in run()
251 # No more fallback pipelines?
252 if is_final_pipeline:
--> 253 raise patched_exception
254 # Go to next fallback pipeline
255 else:
/opt/conda/lib/python3.6/site-packages/numba/compiler.py in run()
243 try:
244 event(stage_name)
--> 245 stage()
246 except _EarlyPipelineCompletion as e:
247 return e.result
/opt/conda/lib/python3.6/site-packages/numba/compiler.py in stage_generic_rewrites()
478 with self.fallback_context(msg):
479 rewrites.rewrite_registry.apply('before-inference',
--> 480 self, self.func_ir)
481
482 def stage_nopython_rewrites(self):
/opt/conda/lib/python3.6/site-packages/numba/rewrites/registry.py in apply()
68 key, block = work_list.pop()
69 matches = rewrite.match(func_ir, block, pipeline.typemap,
---> 70 pipeline.calltypes)
71 if matches:
72 if config.DEBUG or config.DUMP_IR:
/opt/conda/lib/python3.6/site-packages/numba/rewrites/static_getitem.py in match()
19 if expr.op == 'getitem':
20 try:
---> 21 const = func_ir.infer_constant(expr.index)
22 except errors.ConstantInferenceError:
23 continue
/opt/conda/lib/python3.6/site-packages/numba/ir.py in infer_constant()
962 if isinstance(name, Var):
963 name = name.name
--> 964 return self._consts.infer_constant(name)
965
966 def get_definition(self, value, lhs_only=False):
/opt/conda/lib/python3.6/site-packages/numba/consts.py in infer_constant()
32 if name not in self._cache:
33 try:
---> 34 self._cache[name] = (True, self._do_infer(name))
35 except ConstantInferenceError as exc:
36 # Store the exception args only, to avoid keeping
/opt/conda/lib/python3.6/site-packages/numba/consts.py in _do_infer()
58 "no single definition for %r" % (name,))
59 try:
---> 60 const = defn.infer_constant()
61 except ConstantInferenceError:
62 if isinstance(defn, ir.Expr):
/opt/conda/lib/python3.6/site-packages/numba/ir.py in infer_constant()
344
345 def infer_constant(self):
--> 346 raise ConstantInferenceError('%s' % self, loc=self.loc)
347
348
/opt/conda/lib/python3.6/site-packages/numba/errors.py in __init__()
526 self.value = value
527 msg = "Cannot make a constant from: %s" % value
--> 528 super(ConstantInferenceError, self).__init__(msg, loc=loc)
529
530
/opt/conda/lib/python3.6/site-packages/numba/errors.py in __init__()
386 if loc:
387 super(NumbaError, self).__init__(
--> 388 highlight("%s\n%s\n" % (msg, loc.strformat())))
389 else:
390 super(NumbaError, self).__init__(highlight("%s" % (msg,)))
/opt/conda/lib/python3.6/site-packages/numba/ir.py in strformat()
85
86 ret.extend(selected[:-1])
---> 87 ret.append(_termcolor.highlight(selected[-1]))
88
89 # point at the problem with a caret
IndexError: Failed at nopython (nopython rewrites)
list index out of range
Also the imports raise the error:
ImportError Traceback (most recent call last)
<ipython-input-3-4af4d06194bc> in <module>()
----> 1 import dask_ml.joblib # register the distriubted backend
2 from sklearn.datasets import make_classification
3 from sklearn.svm import SVC
4 from sklearn.model_selection import GridSearchCV
5 import pandas as pd
/srv/conda/lib/python3.6/site-packages/dask_ml/joblib.py in <module>()
----> 1 import distributed.joblib # noqa
/srv/conda/lib/python3.6/site-packages/distributed/joblib.py in <module>()
17
18
---> 19 raise ImportError(msg)
ImportError: It is no longer necessary to `import dask_ml.joblib` or
`import distributed.joblib`.
This functionality has moved into the core Joblib codebase.
To use Joblib's Dask backend with Scikit-Learn >= 0.20.0
from dask.distributed import Client
client = Client()
from sklearn.externals import joblib
with joblib.parallel_backend('dask'):
# your scikit-learn code
See http://ml.dask.org/joblib.html for more information.
@anderl80 I am able to execute the entire notebook. Try putting import dask_ml.joblib in its own cell
You no longer need the joblib import (joblib/sklearn upstreamed the dask functionality).
Probably though we should just remove the notebook from this repository. The Pangeo community shouldn't have to deal with this maintenance burden.
On Fri, Dec 21, 2018 at 3:45 AM Robin [email protected] wrote:
@anderl80 https://github.com/anderl80 I am able to execute the entire notebook
— You are receiving this because you are subscribed to this thread. Reply to this email directly, view it on GitHub https://github.com/pangeo-data/pangeo-example-notebooks/issues/27#issuecomment-449367984, or mute the thread https://github.com/notifications/unsubscribe-auth/AASszAb0Ts6d4Ptd1yH0UIsbcW0W1D8gks5u7Mn2gaJpZM4Yn5Y8 .