recommenders
recommenders copied to clipboard
[FEATURE] Test benchmark utils
Description
Related to https://github.com/microsoft/recommenders/pull/1831. We want to programmatically test the movielens benchmark
Expected behavior with the suggested feature
Other Comments
branch: miguel/bench_tests
Make sure that each of them can run in an independent env, not in the full one:
- [x]
pytest tests/integration/examples/test_notebooks_python.py::test_benchmark_movielens_cpu --disable-warnings
- [x]
pytest tests/integration/examples/test_notebooks_pyspark.py::test_benchmark_movielens_pyspark --disable-warnings
- [x]
pytest tests/integration/examples/test_notebooks_gpu.py::test_benchmark_movielens_gpu --disable-warnings
Grab the metrics and make sure they are correct:
- [x] CPU
- [x] PySpark
- [x] GPU
pytest tests/integration/examples/test_notebooks_python.py::test_benchmark_movielens_cpu
First error:
E ModuleNotFoundError Traceback (most recent call last)
E /tmp/ipykernel_9566/1622094595.py in <module>
E 20 from recommenders.models.fastai.fastai_utils import hide_fastai_progress_bar
E 21
E ---> 22 from benchmark_utils import *
E 23
E 24 print(f"System version: {sys.version}")
E
E ModuleNotFoundError: No module named 'benchmark_utils'
Fixed in https://github.com/microsoft/recommenders/commit/d6d85bc0e3ec18b00fd796d16f1cd521e0d4f4d8
relative import error?
Facing the same issue while trying to run the dataset evaluations on Jupyter Notebook
Error in cornac when installing the CPU environment:
$ pytest tests/integration/examples/test_notebooks_python.py::test_benchmark_movielens_cpu --disable-warnings
================================================================================================ test session starts ================================================================================================
platform linux -- Python 3.7.13, pytest-7.2.0, pluggy-1.0.0
rootdir: /home/hoaphumanoid/notebooks/repos/recommenders
plugins: cov-4.0.0, mock-3.10.0, rerunfailures-10.2, anyio-3.6.2, hypothesis-6.56.3
collected 1 item
tests/integration/examples/test_notebooks_python.py F [100%]
===================================================================================================== FAILURES ======================================================================================================
____________________________________________________________________________ test_benchmark_movielens_cpu[size0-algos0-expected_values0] ____________________________________________________________________________
notebooks = {'als_deep_dive': '/home/hoaphumanoid/notebooks/repos/recommenders/examples/02_model_collaborative_filtering/als_deep_..., 'benchmark_movielens': '/home/hoaphumanoid/notebooks/repos/recommenders/examples/06_benchmarks/movielens.ipynb', ...}
output_notebook = 'output.ipynb', kernel_name = 'python3', size = ['100k'], algos = ['svd', 'sar', 'bpr']
expected_values = {'eval_precision': 0.131601, 'eval_precision2': 0.145599, 'eval_recall': 0.038056, 'eval_recall2': 0.051338}
@pytest.mark.notebooks
@pytest.mark.integration
@pytest.mark.parametrize(
"size, algos, expected_values",
[
(
["100k"],
["svd", "sar", "bpr"],
dict(
eval_precision=0.131601,
eval_recall=0.038056,
eval_precision2=0.145599,
eval_recall2=0.051338,
),
),
],
)
def test_benchmark_movielens_cpu(notebooks, output_notebook, kernel_name, size, algos, expected_values):
notebook_path = notebooks["benchmark_movielens"]
pm.execute_notebook(
notebook_path,
output_notebook,
kernel_name=kernel_name,
> parameters=dict(data_sizes=size, algorithms=algos),
)
tests/integration/examples/test_notebooks_python.py:336:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/anaconda/envs/reco_cpu/lib/python3.7/site-packages/papermill/execute.py:128: in execute_notebook
raise_for_execution_errors(nb, output_path)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
nb = {'cells': [{'id': '83c8748c', 'cell_type': 'markdown', 'source': '<span style="color:red; font-family:Helvetica Neue, ...end_time': '2022-10-25T10:48:46.658986', 'duration': 3.229345, 'exception': True}}, 'nbformat': 4, 'nbformat_minor': 5}
output_path = 'output.ipynb'
def raise_for_execution_errors(nb, output_path):
"""Assigned parameters into the appropriate place in the input notebook
Parameters
----------
nb : NotebookNode
Executable notebook object
output_path : str
Path to write executed notebook
"""
error = None
for index, cell in enumerate(nb.cells):
if cell.get("outputs") is None:
continue
for output in cell.outputs:
if output.output_type == "error":
if output.ename == "SystemExit" and (output.evalue == "" or output.evalue == "0"):
continue
error = PapermillExecutionError(
cell_index=index,
exec_count=cell.execution_count,
source=cell.source,
ename=output.ename,
evalue=output.evalue,
traceback=output.traceback,
)
break
if error:
# Write notebook back out with the Error Message at the top of the Notebook, and a link to
# the relevant cell (by adding a note just before the failure with an HTML anchor)
error_msg = ERROR_MESSAGE_TEMPLATE % str(error.exec_count)
error_msg_cell = nbformat.v4.new_markdown_cell(error_msg)
error_msg_cell.metadata['tags'] = [ERROR_MARKER_TAG]
error_anchor_cell = nbformat.v4.new_markdown_cell(ERROR_ANCHOR_MSG)
error_anchor_cell.metadata['tags'] = [ERROR_MARKER_TAG]
# put the anchor before the cell with the error, before all the indices change due to the
# heading-prepending
nb.cells.insert(error.cell_index, error_anchor_cell)
nb.cells.insert(0, error_msg_cell)
write_ipynb(nb, output_path)
> raise error
E papermill.exceptions.PapermillExecutionError:
E ---------------------------------------------------------------------------
E Exception encountered at "In [2]":
E ---------------------------------------------------------------------------
E ImportError Traceback (most recent call last)
E /tmp/ipykernel_26195/875585600.py in <module>
E 6 import seaborn as sns
E 7 import surprise
E ----> 8 import cornac
E 9
E 10 try:
E
E /anaconda/envs/reco_cpu/lib/python3.7/site-packages/cornac/__init__.py in <module>
E 14 # ============================================================================
E 15
E ---> 16 from . import data
E 17 from . import datasets
E 18 from . import eval_methods
E
E /anaconda/envs/reco_cpu/lib/python3.7/site-packages/cornac/data/__init__.py in <module>
E 16 from .modality import Modality
E 17 from .modality import FeatureModality
E ---> 18 from .text import TextModality, ReviewModality
E 19 from .image import ImageModality
E 20 from .graph import GraphModality
E
E /anaconda/envs/reco_cpu/lib/python3.7/site-packages/cornac/data/text.py in <module>
E 25 from . import FeatureModality
E 26 from .modality import fallback_feature
E ---> 27 from ..utils import normalize
E 28
E 29 __all__ = ['Tokenizer',
E
E /anaconda/envs/reco_cpu/lib/python3.7/site-packages/cornac/utils/__init__.py in <module>
E 14 # ============================================================================
E 15
E ---> 16 from .common import validate_format
E 17 from .common import estimate_batches
E 18 from .common import get_rng
E
E /anaconda/envs/reco_cpu/lib/python3.7/site-packages/cornac/utils/common.py in <module>
E 19 import scipy.sparse as sp
E 20
E ---> 21 from .fast_sparse_funcs import (
E 22 inplace_csr_row_normalize_l1,
E 23 inplace_csr_row_normalize_l2
E
E ImportError: libpython3.7m.so.1.0: cannot open shared object file: No such file or directory
/anaconda/envs/reco_cpu/lib/python3.7/site-packages/papermill/execute.py:232: PapermillExecutionError
fixed with export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/anaconda/envs/reco_cpu/lib/
I will need to add it to the path when the code env is activated.
Error in pytest tests/integration/examples/test_notebooks_pyspark.py::test_benchmark_movielens_pyspark --disable-warnings
E papermill.exceptions.PapermillExecutionError:
E ---------------------------------------------------------------------------
E Exception encountered at "In [16]":
E ---------------------------------------------------------------------------
E Py4JJavaError Traceback (most recent call last)
E <timed exec> in <module>
E
E /tmp/ipykernel_11524/1119464800.py in <lambda>(params, data)
E 1 trainer = {
E ----> 2 "als": lambda params, data: train_als(params, data),
E 3 "svd": lambda params, data: train_svd(params, data),
E 4 "sar": lambda params, data: train_sar(params, data),
E 5 "fastai": lambda params, data: train_fastai(params, data),
E
E ~/notebooks/repos/recommenders/examples/06_benchmarks/benchmark_utils.py in train_als(params, data)
E 87 symbol = ALS(**params)
E 88 with Timer() as t:
E ---> 89 model = symbol.fit(data)
E 90 return model, t
E 91
E
E /anaconda/envs/reco_spark/lib/python3.7/site-packages/pyspark/ml/base.py in fit(self, dataset, params)
E 159 return self.copy(params)._fit(dataset)
E 160 else:
E --> 161 return self._fit(dataset)
E 162 else:
E 163 raise TypeError("Params must be either a param map or a list/tuple of param maps, "
E
E /anaconda/envs/reco_spark/lib/python3.7/site-packages/pyspark/ml/wrapper.py in _fit(self, dataset)
E 333
E 334 def _fit(self, dataset):
E --> 335 java_model = self._fit_java(dataset)
E 336 model = self._create_model(java_model)
E 337 return self._copyValues(model)
E
E /anaconda/envs/reco_spark/lib/python3.7/site-packages/pyspark/ml/wrapper.py in _fit_java(self, dataset)
E 330 """
E 331 self._transfer_params_to_java()
E --> 332 return self._java_obj.fit(dataset._jdf)
E 333
E 334 def _fit(self, dataset):
E
E /anaconda/envs/reco_spark/lib/python3.7/site-packages/py4j/java_gateway.py in __call__(self, *args)
E 1320 answer = self.gateway_client.send_command(command)
E 1321 return_value = get_return_value(
E -> 1322 answer, self.gateway_client, self.target_id, self.name)
E 1323
E 1324 for temp_arg in temp_args:
E
E /anaconda/envs/reco_spark/lib/python3.7/site-packages/pyspark/sql/utils.py in deco(*a, **kw)
E 109 def deco(*a, **kw):
E 110 try:
E --> 111 return f(*a, **kw)
E 112 except py4j.protocol.Py4JJavaError as e:
E 113 converted = convert_exception(e.java_exception)
E
E /anaconda/envs/reco_spark/lib/python3.7/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
E 326 raise Py4JJavaError(
E 327 "An error occurred while calling {0}{1}{2}.\n".
E --> 328 format(target_id, ".", name), value)
E 329 else:
E 330 raise Py4JError(
E
E Py4JJavaError: An error occurred while calling o61.fit.
E : org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0) (localhost executor driver): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
E File "/dsvm/tools/spark/current/python/lib/pyspark.zip/pyspark/worker.py", line 473, in main
E raise Exception(("Python in worker has different version %s than that in " +
E Exception: Python in worker has different version 3.8 than that in driver 3.7, PySpark cannot run with different minor versions. Please check environment variables PYSPARK_PYTHON and PYSPARK_DRIVER_PYTHON are correctly set.
Export variables
export PYSPARK_PYTHON=/anaconda/envs/reco_spark/bin/python
export PYSPARK_DRIVER_PYTHON=/anaconda/envs/reco_spark/bin/python
Another error:
E papermill.exceptions.PapermillExecutionError:
E ---------------------------------------------------------------------------
E Exception encountered at "In [16]":
E ---------------------------------------------------------------------------
E Py4JJavaError Traceback (most recent call last)
E <timed exec> in <module>
E
E /tmp/ipykernel_12471/3470602583.py in <lambda>(test, predictions, k)
E 7
E 8 ranking_evaluator = {
E ----> 9 "als": lambda test, predictions, k: ranking_metrics_pyspark(test, predictions, k),
E 10 "sar": lambda test, predictions, k: ranking_metrics_python(test, predictions, k),
E 11 "svd": lambda test, predictions, k: ranking_metrics_python(test, predictions, k),
E
E ~/notebooks/repos/recommenders/examples/06_benchmarks/benchmark_utils.py in ranking_metrics_pyspark(test, predictions, k)
E 383 def ranking_metrics_pyspark(test, predictions, k=DEFAULT_K):
E 384 rank_eval = SparkRankingEvaluation(
E --> 385 test, predictions, k=k, relevancy_method="top_k", **COL_DICT
E 386 )
E 387 return {
E
E ~/notebooks/repos/recommenders/recommenders/evaluation/spark_evaluation.py in __init__(self, rating_true, rating_pred, k, relevancy_method, col_user, col_item, col_rating, col_prediction, threshold)
E 282 )
E 283
E --> 284 self._metrics = self._calculate_metrics()
E 285
E 286 def _calculate_metrics(self):
E
E ~/notebooks/repos/recommenders/recommenders/evaluation/spark_evaluation.py in _calculate_metrics(self)
E 298 ).drop(self.col_user)
E 299
E --> 300 return RankingMetrics(self._items_for_user_all.rdd)
E 301
E 302 def precision_at_k(self):
E
E /anaconda/envs/reco_spark/lib/python3.7/site-packages/pyspark/sql/dataframe.py in rdd(self)
E 84 """
E 85 if self._lazy_rdd is None:
E ---> 86 jrdd = self._jdf.javaToPython()
E 87 self._lazy_rdd = RDD(jrdd, self.sql_ctx._sc, BatchedSerializer(PickleSerializer()))
E 88 return self._lazy_rdd
E
E /anaconda/envs/reco_spark/lib/python3.7/site-packages/py4j/java_gateway.py in __call__(self, *args)
E 1320 answer = self.gateway_client.send_command(command)
E 1321 return_value = get_return_value(
E -> 1322 answer, self.gateway_client, self.target_id, self.name)
E 1323
E 1324 for temp_arg in temp_args:
E
E /anaconda/envs/reco_spark/lib/python3.7/site-packages/pyspark/sql/utils.py in deco(*a, **kw)
E 109 def deco(*a, **kw):
E 110 try:
E --> 111 return f(*a, **kw)
E 112 except py4j.protocol.Py4JJavaError as e:
E 113 converted = convert_exception(e.java_exception)
E
E /anaconda/envs/reco_spark/lib/python3.7/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
E 326 raise Py4JJavaError(
E 327 "An error occurred while calling {0}{1}{2}.\n".
E --> 328 format(target_id, ".", name), value)
E 329 else:
E 330 raise Py4JError(
E
E Py4JJavaError: An error occurred while calling o361.javaToPython.
E : java.lang.StackOverflowError
E at java.base/java.io.ObjectOutputStream$ReplaceTable.lookup(ObjectOutputStream.java:2395)
E at java.base/java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1104)
E at java.base/java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1543)
E at java.base/java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1500)
E at java.base/java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1423)
E at java.base/java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1169)
E at java.base/java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1543)
E at java.base/java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1500)
E at java.base/java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1423)
E at java.base/java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1169)
E at java.base/java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:345)
E at scala.collection.immutable.List$SerializationProxy.writeObject(List.scala:477)
E at jdk.internal.reflect.GeneratedMethodAccessor9.invoke(Unknown Source)
E at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
E at java.base/java.lang.reflect.Method.invoke(Method.java:566)
.
.
.
E at java.base/java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1500)
E at java.base/java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1423)
E at java.base/java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1169)
E at java.base/java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:345)
E at scala.collection.immutable.List$SerializationProxy.writeObject(List.scala:477)
E at jdk.internal.reflect.GeneratedMethodAccessor9.invoke(Unknown Source)
E at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
E at java.base/java.lang.reflect.Method.invoke(Method.java:566)
/anaconda/envs/reco_spark/lib/python3.7/site-packages/papermill/execute.py:232: PapermillExecutionError
----------------------------------------------------------------------------------------------- Captured stdout call ------------------------------------------------------------------------------------------------
{"@version":1,"source_host":"migonzareco","message":"Unable to load native-hadoop library for your platform... using builtin-java classes where applicable","thread_name":"main","@timestamp":"2022-10-26T09:39:01.064+0000","level":"WARN","logger_name":"org.apache.hadoop.util.NativeCodeLoader"}
{"mdc":{"mdc.taskName":"task 0.0 in stage 9.0 (TID 39)"},"@version":1,"source_host":"migonzareco","message":"Failed to load implementation from: com.github.fommil.netlib.NativeSystemBLAS","thread_name":"Executor task launch worker for task 0.0 in stage 9.0 (TID 39)","@timestamp":"2022-10-26T09:39:15.039+0000","level":"WARN","logger_name":"com.github.fommil.netlib.BLAS"}
{"mdc":{"mdc.taskName":"task 0.0 in stage 9.0 (TID 39)"},"@version":1,"source_host":"migonzareco","message":"Failed to load implementation from: com.github.fommil.netlib.NativeRefBLAS","thread_name":"Executor task launch worker for task 0.0 in stage 9.0 (TID 39)","@timestamp":"2022-10-26T09:39:15.042+0000","level":"WARN","logger_name":"com.github.fommil.netlib.BLAS"}
{"mdc":{"mdc.taskName":"task 2.0 in stage 11.0 (TID 51)"},"@version":1,"source_host":"migonzareco","message":"Failed to load implementation from: com.github.fommil.netlib.NativeSystemLAPACK","thread_name":"Executor task launch worker for task 2.0 in stage 11.0 (TID 51)","@timestamp":"2022-10-26T09:39:15.187+0000","level":"WARN","logger_name":"com.github.fommil.netlib.LAPACK"}
{"mdc":{"mdc.taskName":"task 2.0 in stage 11.0 (TID 51)"},"@version":1,"source_host":"migonzareco","message":"Failed to load implementation from: com.github.fommil.netlib.NativeRefLAPACK","thread_name":"Executor task launch worker for task 2.0 in stage 11.0 (TID 51)","@timestamp":"2022-10-26T09:39:15.187+0000","level":"WARN","logger_name":"com.github.fommil.netlib.LAPACK"}
{"@version":1,"source_host":"migonzareco","message":"Constructing trivially true equals predicate, 'userID#225 = userID#225'. Perhaps you need to use aliases.","thread_name":"Thread-4","@timestamp":"2022-10-26T09:39:39.000+0000","level":"WARN","logger_name":"org.apache.spark.sql.Column"}
{"@version":1,"source_host":"migonzareco","message":"Constructing trivially true equals predicate, 'itemID#226 = itemID#226'. Perhaps you need to use aliases.","thread_name":"Thread-4","@timestamp":"2022-10-26T09:39:39.002+0000","level":"WARN","logger_name":"org.apache.spark.sql.Column"}
{"exception":{"exception_class":"java.io.IOException","exception_message":"Failed to delete: /home/hoaphumanoid/.spark/scratch/blockmgr-ca49033b-82f1-400f-a181-5f03db1de1d5","stacktrace":"java.io.IOException: Failed to delete: /home/hoaphumanoid/.spark/scratch/blockmgr-ca49033b-82f1-400f-a181-5f03db1de1d5\n\tat org.apache.spark.network.util.JavaUtils.deleteRecursivelyUsingUnixNative(JavaUtils.java:171)\n\tat org.apache.spark.network.util.JavaUtils.deleteRecursively(JavaUtils.java:110)\n\tat org.apache.spark.network.util.JavaUtils.deleteRecursively(JavaUtils.java:91)\n\tat org.apache.spark.util.Utils$.deleteRecursively(Utils.scala:1141)\n\tat org.apache.spark.storage.DiskBlockManager.$anonfun$doStop$1(DiskBlockManager.scala:182)\n\tat org.apache.spark.storage.DiskBlockManager.$anonfun$doStop$1$adapted(DiskBlockManager.scala:178)\n\tat scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)\n\tat scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)\n\tat scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)\n\tat org.apache.spark.storage.DiskBlockManager.doStop(DiskBlockManager.scala:178)\n\tat org.apache.spark.storage.DiskBlockManager.stop(DiskBlockManager.scala:173)\n\tat org.apache.spark.storage.BlockManager.stop(BlockManager.scala:1931)\n\tat org.apache.spark.SparkEnv.stop(SparkEnv.scala:92)\n\tat org.apache.spark.SparkContext.$anonfun$stop$23(SparkContext.scala:2108)\n\tat org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1419)\n\tat org.apache.spark.SparkContext.stop(SparkContext.scala:2108)\n\tat org.apache.spark.SparkContext.$anonfun$new$37(SparkContext.scala:661)\n\tat org.apache.spark.util.SparkShutdownHook.run(ShutdownHookManager.scala:214)\n\tat org.apache.spark.util.SparkShutdownHookManager.$anonfun$runAll$2(ShutdownHookManager.scala:188)\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\n\tat org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1996)\n\tat org.apache.spark.util.SparkShutdownHookManager.$anonfun$runAll$1(ShutdownHookManager.scala:188)\n\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\n\tat scala.util.Try$.apply(Try.scala:213)\n\tat org.apache.spark.util.SparkShutdownHookManager.runAll(ShutdownHookManager.scala:188)\n\tat org.apache.spark.util.SparkShutdownHookManager$$anon$2.run(ShutdownHookManager.scala:178)\n\tat org.apache.hadoop.util.ShutdownHookManager$1.run(ShutdownHookManager.java:54)\n"},"@version":1,"source_host":"migonzareco","message":"Attempt to delete using native Unix OS command failed for path = /home/hoaphumanoid/.spark/scratch/blockmgr-ca49033b-82f1-400f-a181-5f03db1de1d5. Falling back to Java IO way","thread_name":"Thread-0","@timestamp":"2022-10-26T09:39:41.051+0000","level":"WARN","logger_name":"org.apache.spark.network.util.JavaUtils"}
----------------------------------------------------------------------------------------------- Captured stderr call ------------------------------------------------------------------------------------------------
Executing: 21%|██ | 5/24 [00:04<00:14, 1.28cell/s]WARNING: An illegal reflective access operation has occurred
WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/dsvm/tools/spark/spark-3.1.2/jars/spark-unsafe_2.12-3.1.2.jar) to constructor java.nio.DirectByteBuffer(long,int)
WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform
WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations
WARNING: All illegal access operations will be denied in a future release
Executing: 100%|██████████| 24/24 [00:49<00:00, 2.05s/cell]
Flaky behavior, but it works. I have seen this behavior also in the notebook.
Error in LightGCN:
$ pytest tests/integration/examples/test_notebooks_gpu.py::test_benchmark_movielens_gpu --disable-warnings
================================================================================================ test session starts ================================================================================================
platform linux -- Python 3.7.13, pytest-7.2.0, pluggy-1.0.0
rootdir: /home/hoaphumanoid/notebooks/repos/recommenders
plugins: cov-4.0.0, mock-3.10.0, rerunfailures-10.2, anyio-3.6.2, hypothesis-6.56.3
collected 1 item
tests/integration/examples/test_notebooks_gpu.py F [100%]
===================================================================================================== FAILURES ======================================================================================================
____________________________________________________________________________ test_benchmark_movielens_gpu[size0-algos0-expected_values0] ____________________________________________________________________________
notebooks = {'als_deep_dive': '/home/hoaphumanoid/notebooks/repos/recommenders/examples/02_model_collaborative_filtering/als_deep_..., 'benchmark_movielens': '/home/hoaphumanoid/notebooks/repos/recommenders/examples/06_benchmarks/movielens.ipynb', ...}
output_notebook = 'output.ipynb', kernel_name = 'python3', size = ['100k'], algos = ['ncf', 'fastai', 'bivae', 'lightgcn']
expected_values = {'eval_precision': 0.131601, 'eval_precision2': 0.145599, 'eval_recall': 0.038056, 'eval_recall2': 0.051338}
@pytest.mark.gpu
@pytest.mark.notebooks
@pytest.mark.integration
@pytest.mark.parametrize(
"size, algos, expected_values",
[
(
["100k"],
["ncf", "fastai", "bivae", "lightgcn"],
dict(
eval_precision=0.131601,
eval_recall=0.038056,
eval_precision2=0.145599,
eval_recall2=0.051338,
),
),
],
)
def test_benchmark_movielens_gpu(notebooks, output_notebook, kernel_name, size, algos, expected_values):
notebook_path = notebooks["benchmark_movielens"]
pm.execute_notebook(
notebook_path,
output_notebook,
kernel_name=kernel_name,
> parameters=dict(data_sizes=size, algorithms=algos),
)
tests/integration/examples/test_notebooks_gpu.py:752:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/anaconda/envs/reco_gpu/lib/python3.7/site-packages/papermill/execute.py:128: in execute_notebook
raise_for_execution_errors(nb, output_path)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
nb = {'cells': [{'id': '5a4f5450', 'cell_type': 'markdown', 'source': '<span style="color:red; font-family:Helvetica Neue, ...d_time': '2022-10-27T16:21:06.373962', 'duration': 287.705098, 'exception': None}}, 'nbformat': 4, 'nbformat_minor': 5}
output_path = 'output.ipynb'
def raise_for_execution_errors(nb, output_path):
"""Assigned parameters into the appropriate place in the input notebook
Parameters
----------
nb : NotebookNode
Executable notebook object
output_path : str
Path to write executed notebook
"""
error = None
for index, cell in enumerate(nb.cells):
if cell.get("outputs") is None:
continue
for output in cell.outputs:
if output.output_type == "error":
if output.ename == "SystemExit" and (output.evalue == "" or output.evalue == "0"):
continue
error = PapermillExecutionError(
cell_index=index,
exec_count=cell.execution_count,
source=cell.source,
ename=output.ename,
evalue=output.evalue,
traceback=output.traceback,
)
break
if error:
# Write notebook back out with the Error Message at the top of the Notebook, and a link to
# the relevant cell (by adding a note just before the failure with an HTML anchor)
error_msg = ERROR_MESSAGE_TEMPLATE % str(error.exec_count)
error_msg_cell = nbformat.v4.new_markdown_cell(error_msg)
error_msg_cell.metadata['tags'] = [ERROR_MARKER_TAG]
error_anchor_cell = nbformat.v4.new_markdown_cell(ERROR_ANCHOR_MSG)
error_anchor_cell.metadata['tags'] = [ERROR_MARKER_TAG]
# put the anchor before the cell with the error, before all the indices change due to the
# heading-prepending
nb.cells.insert(error.cell_index, error_anchor_cell)
nb.cells.insert(0, error_msg_cell)
write_ipynb(nb, output_path)
> raise error
E papermill.exceptions.PapermillExecutionError:
E ---------------------------------------------------------------------------
E Exception encountered at "In [16]":
E ---------------------------------------------------------------------------
E FileNotFoundError Traceback (most recent call last)
E <timed exec> in <module>
E
E /tmp/ipykernel_21879/1119464800.py in <lambda>(params, data)
E 7 "bpr": lambda params, data: train_bpr(params, data),
E 8 "bivae": lambda params, data: train_bivae(params, data),
E ----> 9 "lightgcn": lambda params, data: train_lightgcn(params, data),
E 10 }
E
E ~/notebooks/repos/recommenders/examples/06_benchmarks/benchmark_utils.py in train_lightgcn(params, data)
E 356
E 357 def train_lightgcn(params, data):
E --> 358 hparams = prepare_hparams(**params)
E 359 model = LightGCN(hparams, data)
E 360 with Timer() as t:
E
E ~/notebooks/repos/recommenders/recommenders/models/deeprec/deeprec_utils.py in prepare_hparams(yaml_file, **kwargs)
E 415 """
E 416 if yaml_file is not None:
E --> 417 config = load_yaml(yaml_file)
E 418 config = flat_config(config)
E 419 else:
E
E ~/notebooks/repos/recommenders/recommenders/models/deeprec/deeprec_utils.py in load_yaml(filename)
E 294 """
E 295 try:
E --> 296 with open(filename, "r") as f:
E 297 config = yaml.load(f, yaml.SafeLoader)
E 298 return config
E
E FileNotFoundError: [Errno 2] No such file or directory: '../../recommenders/models/deeprec/config/lightgcn.yaml'
/anaconda/envs/reco_gpu/lib/python3.7/site-packages/papermill/execute.py:232: PapermillExecutionError
Fixed by adding the parameters directly instead of the yaml
Error in LightGCN:
$ pytest tests/integration/examples/test_notebooks_gpu.py::test_benchmark_movielens_gpu --disable-warnings
================================================================================================ test session starts ================================================================================================
platform linux -- Python 3.7.13, pytest-7.2.0, pluggy-1.0.0
rootdir: /home/hoaphumanoid/notebooks/repos/recommenders
plugins: cov-4.0.0, mock-3.10.0, rerunfailures-10.2, anyio-3.6.2, hypothesis-6.56.3
collected 1 item
tests/integration/examples/test_notebooks_gpu.py F [100%]
===================================================================================================== FAILURES ======================================================================================================
____________________________________________________________________________ test_benchmark_movielens_gpu[size0-algos0-expected_values0] ____________________________________________________________________________
notebooks = {'als_deep_dive': '/home/hoaphumanoid/notebooks/repos/recommenders/examples/02_model_collaborative_filtering/als_deep_..., 'benchmark_movielens': '/home/hoaphumanoid/notebooks/repos/recommenders/examples/06_benchmarks/movielens.ipynb', ...}
output_notebook = 'output.ipynb', kernel_name = 'python3', size = ['100k'], algos = ['ncf', 'fastai', 'bivae', 'lightgcn']
expected_values = {'eval_precision': 0.131601, 'eval_precision2': 0.145599, 'eval_recall': 0.038056, 'eval_recall2': 0.051338}
@pytest.mark.gpu
@pytest.mark.notebooks
@pytest.mark.integration
@pytest.mark.parametrize(
"size, algos, expected_values",
[
(
["100k"],
["ncf", "fastai", "bivae", "lightgcn"],
dict(
eval_precision=0.131601,
eval_recall=0.038056,
eval_precision2=0.145599,
eval_recall2=0.051338,
),
),
],
)
def test_benchmark_movielens_gpu(notebooks, output_notebook, kernel_name, size, algos, expected_values):
notebook_path = notebooks["benchmark_movielens"]
pm.execute_notebook(
notebook_path,
output_notebook,
kernel_name=kernel_name,
> parameters=dict(data_sizes=size, algorithms=algos),
)
tests/integration/examples/test_notebooks_gpu.py:752:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
/anaconda/envs/reco_gpu/lib/python3.7/site-packages/papermill/execute.py:128: in execute_notebook
raise_for_execution_errors(nb, output_path)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
nb = {'cells': [{'id': 'c4e76390', 'cell_type': 'markdown', 'source': '<span style="color:red; font-family:Helvetica Neue, ...d_time': '2022-10-27T19:45:57.138133', 'duration': 192.464262, 'exception': None}}, 'nbformat': 4, 'nbformat_minor': 5}
output_path = 'output.ipynb'
def raise_for_execution_errors(nb, output_path):
"""Assigned parameters into the appropriate place in the input notebook
Parameters
----------
nb : NotebookNode
Executable notebook object
output_path : str
Path to write executed notebook
"""
error = None
for index, cell in enumerate(nb.cells):
if cell.get("outputs") is None:
continue
for output in cell.outputs:
if output.output_type == "error":
if output.ename == "SystemExit" and (output.evalue == "" or output.evalue == "0"):
continue
error = PapermillExecutionError(
cell_index=index,
exec_count=cell.execution_count,
source=cell.source,
ename=output.ename,
evalue=output.evalue,
traceback=output.traceback,
)
break
if error:
# Write notebook back out with the Error Message at the top of the Notebook, and a link to
# the relevant cell (by adding a note just before the failure with an HTML anchor)
error_msg = ERROR_MESSAGE_TEMPLATE % str(error.exec_count)
error_msg_cell = nbformat.v4.new_markdown_cell(error_msg)
error_msg_cell.metadata['tags'] = [ERROR_MARKER_TAG]
error_anchor_cell = nbformat.v4.new_markdown_cell(ERROR_ANCHOR_MSG)
error_anchor_cell.metadata['tags'] = [ERROR_MARKER_TAG]
# put the anchor before the cell with the error, before all the indices change due to the
# heading-prepending
nb.cells.insert(error.cell_index, error_anchor_cell)
nb.cells.insert(0, error_msg_cell)
write_ipynb(nb, output_path)
> raise error
E papermill.exceptions.PapermillExecutionError:
E ---------------------------------------------------------------------------
E Exception encountered at "In [16]":
E ---------------------------------------------------------------------------
E ValueError Traceback (most recent call last)
E <timed exec> in <module>
E
E /tmp/ipykernel_15461/1119464800.py in <lambda>(params, data)
E 7 "bpr": lambda params, data: train_bpr(params, data),
E 8 "bivae": lambda params, data: train_bivae(params, data),
E ----> 9 "lightgcn": lambda params, data: train_lightgcn(params, data),
E 10 }
E
E ~/notebooks/repos/recommenders/examples/06_benchmarks/benchmark_utils.py in train_lightgcn(params, data)
E 357 model = LightGCN(hparams, data)
E 358 with Timer() as t:
E --> 359 model.fit()
E 360 return model, t
E 361
E
E ~/notebooks/repos/recommenders/recommenders/models/deeprec/models/graphrec/lightgcn.py in fit(self)
E 251 else:
E 252 eval_start = time.time()
E --> 253 ret = self.run_eval()
E 254 eval_end = time.time()
E 255 eval_time = eval_end - eval_start
E
E ~/notebooks/repos/recommenders/recommenders/models/deeprec/models/graphrec/lightgcn.py in run_eval(self)
E 320 ret.append(
E 321 recall_at_k(
E --> 322 self.data.test, topk_scores, relevancy_method=None, k=self.top_k
E 323 )
E 324 )
E
E ~/notebooks/repos/recommenders/recommenders/evaluation/python_evaluation.py in recall_at_k(rating_true, rating_pred, col_user, col_item, col_prediction, relevancy_method, k, threshold, **kwargs)
E 499 k items exist for a user in rating_true.
E 500 """
E --> 501 col_rating = _get_rating_column(relevancy_method, **kwargs)
E 502 df_hit, df_hit_count, n_users = merge_ranking_true_pred(
E 503 rating_true=rating_true,
E
E ~/notebooks/repos/recommenders/recommenders/evaluation/python_evaluation.py in _get_rating_column(relevancy_method, **kwargs)
E 750 if relevancy_method != "top_k":
E 751 if "col_rating" not in kwargs:
E --> 752 raise ValueError("Expected an argument `col_rating` but wasn't found.")
E 753 col_rating = kwargs.get("col_rating")
E 754 else:
E
E ValueError: Expected an argument `col_rating` but wasn't found.
Fixed in https://github.com/microsoft/recommenders/commit/4ffbb16b02f13574c312315829e2aa10cd423821