systems
systems copied to clipboard
DAG (wrapper) operator for HugeCTR serving support
This PR addresses concerns around hugectr support. This represents the foundations for supporting a hugectr model in merlin systems. Creates the operator that will house a hugectr model and allow it to be used in the merlin graph for inference operations.
Click to view CI Results
GitHub pull request #125 of commit 8986c8a491173b051732e8d54adbbcd04cca1454, no merge conflicts. Running as SYSTEM Setting status of 8986c8a491173b051732e8d54adbbcd04cca1454 to PENDING with url https://10.20.13.93:8080/job/merlin_systems/100/console and message: 'Pending' Using context: Jenkins Building on master in workspace /var/jenkins_home/workspace/merlin_systems using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e > git rev-parse --is-inside-work-tree # timeout=10 Fetching changes from the remote Git repository > git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10 Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems > git --version # timeout=10 using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken > git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10 > git rev-parse 8986c8a491173b051732e8d54adbbcd04cca1454^{commit} # timeout=10 Checking out Revision 8986c8a491173b051732e8d54adbbcd04cca1454 (detached) > git config core.sparsecheckout # timeout=10 > git checkout -f 8986c8a491173b051732e8d54adbbcd04cca1454 # timeout=10 Commit message: "add foundation of hugectr op" > git rev-list --no-walk fc4e464729df3bd367bb990310b5f2119af35a46 # timeout=10 [merlin_systems] $ /bin/bash /tmp/jenkins10386077036665179152.sh ============================= test session starts ============================== platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0 rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0 collected 18 items / 2 skippedtests/unit/test_version.py . [ 5%] tests/unit/systems/test_ensemble.py ... [ 22%] tests/unit/systems/test_ensemble_ops.py .. [ 33%] tests/unit/systems/test_export.py . [ 38%] tests/unit/systems/test_graph.py . [ 44%] tests/unit/systems/test_inference_ops.py .. [ 55%] tests/unit/systems/test_op_runner.py .... [ 77%] tests/unit/systems/test_tensorflow_inf_op.py ... [ 94%] tests/unit/systems/hugectr/test_hugectr.py F [100%]
=================================== FAILURES =================================== ________________________________ test_training _________________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-0/test_training0')
def test_training(tmpdir): # Download & Convert data download_file( "http://files.grouplens.org/datasets/movielens/ml-25m.zip", os.path.join(DATA_DIR, "ml-25m.zip"), ) ratings = cudf.read_csv(os.path.join(DATA_DIR, "ml-25m", "ratings.csv")) ratings["new_cat1"] = ratings["userId"] / ratings["movieId"] ratings["new_cat1"] = ratings["new_cat1"].astype("int64") ratings.head() ratings = ratings.drop("timestamp", axis=1) train, valid = train_test_split(ratings, test_size=0.2, random_state=42) train.to_parquet(DATA_DIR + "train.parquet") valid.to_parquet(DATA_DIR + "valid.parquet") del train del valid gc.collect() # Perform ETL with NVTabular cat_features = CATEGORICAL_COLUMNS >> nvt.ops.Categorify(cat_cache="device") ratings = nvt.ColumnSelector(["rating"]) >> nvt.ops.LambdaOp( lambda col: (col > 3).astype("int8") ) output = cat_features + ratings workflow = nvt.Workflow(output) train_dataset = nvt.Dataset(DATA_DIR + "train.parquet", part_size="100MB") valid_dataset = nvt.Dataset(DATA_DIR + "valid.parquet", part_size="100MB") workflow.fit(train_dataset) dict_dtypes = {} for col in CATEGORICAL_COLUMNS: dict_dtypes[col] = np.int64 for col in LABEL_COLUMNS: dict_dtypes[col] = np.float32 if path.exists(DATA_DIR + "train"): shutil.rmtree(os.path.join(DATA_DIR, "train")) if path.exists(DATA_DIR + "valid"): shutil.rmtree(os.path.join(DATA_DIR, "valid")) workflow.transform(train_dataset).to_parquet( output_path=DATA_DIR + "train/", shuffle=nvt.io.Shuffle.PER_PARTITION, cats=CATEGORICAL_COLUMNS, labels=LABEL_COLUMNS, dtypes=dict_dtypes, ) workflow.transform(valid_dataset).to_parquet( output_path=DATA_DIR + "valid/", shuffle=False, cats=CATEGORICAL_COLUMNS, labels=LABEL_COLUMNS, dtypes=dict_dtypes, ) # Train with HugeCTR embeddings = get_embedding_sizes(workflow) total_cardinality = 0 slot_sizes = [] for column in CATEGORICAL_COLUMNS: slot_sizes.append(embeddings[column][0]) total_cardinality += embeddings[column][0] test_data_path = DATA_DIR + "test/" if path.exists(test_data_path): shutil.rmtree(test_data_path) os.mkdir(test_data_path) if path.exists(MODEL_DIR): shutil.rmtree(MODEL_DIR) os.makedirs(TRAIN_DIR) sample_data = cudf.read_parquet(DATA_DIR + "valid.parquet", num_rows=TEST_N_ROWS) sample_data.to_csv(test_data_path + "data.csv") sample_data_trans = nvt.workflow.workflow._transform_partition( sample_data, [workflow.output_node] ) dense_features, embedding_columns, row_ptrs = _convert(sample_data_trans, slot_sizes)
model = _run_model(slot_sizes, total_cardinality)
tests/unit/systems/hugectr/test_hugectr.py:306:
slot_sizes = [162542, 56586, 76204], total_cardinality = 295332
def _run_model(slot_sizes, total_cardinality):
solver = hugectr.CreateSolver(
vvgpu=[[0]], batchsize=2048, batchsize_eval=2048, max_eval_batches=160, i64_input_key=True, use_mixed_precision=False, repeat_dataset=True, )
E AttributeError: 'NoneType' object has no attribute 'CreateSolver'
tests/unit/systems/hugectr/test_hugectr.py:65: AttributeError ----------------------------- Captured stderr call -----------------------------
downloading ml-25m.zip: 0.00B [00:00, ?B/s] downloading ml-25m.zip: 0%| | 0.00/262M [00:00<?, ?B/s] downloading ml-25m.zip: 0%| | 57.3k/262M [00:00<10:11, 428kB/s] downloading ml-25m.zip: 0%| | 197k/262M [00:00<05:06, 854kB/s] downloading ml-25m.zip: 0%| | 410k/262M [00:00<03:22, 1.29MB/s] downloading ml-25m.zip: 0%| | 647k/262M [00:00<02:45, 1.58MB/s] downloading ml-25m.zip: 0%| | 893k/262M [00:00<02:31, 1.72MB/s] downloading ml-25m.zip: 0%| | 1.16M/262M [00:00<02:16, 1.90MB/s] downloading ml-25m.zip: 1%| | 1.45M/262M [00:00<02:03, 2.11MB/s] downloading ml-25m.zip: 1%| | 1.77M/262M [00:00<01:53, 2.28MB/s] downloading ml-25m.zip: 1%| | 2.11M/262M [00:01<01:44, 2.48MB/s] downloading ml-25m.zip: 1%| | 2.47M/262M [00:01<01:36, 2.68MB/s] downloading ml-25m.zip: 1%| | 2.87M/262M [00:01<01:28, 2.93MB/s] downloading ml-25m.zip: 1%|▏ | 3.28M/262M [00:01<01:24, 3.07MB/s] downloading ml-25m.zip: 1%|▏ | 3.74M/262M [00:01<01:18, 3.29MB/s] downloading ml-25m.zip: 2%|▏ | 4.24M/262M [00:01<01:12, 3.57MB/s] downloading ml-25m.zip: 2%|▏ | 4.77M/262M [00:01<01:06, 3.89MB/s] downloading ml-25m.zip: 2%|▏ | 5.33M/262M [00:01<01:01, 4.20MB/s] downloading ml-25m.zip: 2%|▏ | 5.92M/262M [00:02<00:57, 4.49MB/s] downloading ml-25m.zip: 2%|▏ | 6.47M/262M [00:02<00:53, 4.75MB/s] downloading ml-25m.zip: 3%|▎ | 6.96M/262M [00:02<00:55, 4.62MB/s] downloading ml-25m.zip: 3%|▎ | 7.42M/262M [00:02<00:55, 4.63MB/s] downloading ml-25m.zip: 3%|▎ | 7.99M/262M [00:02<00:53, 4.73MB/s] downloading ml-25m.zip: 3%|▎ | 8.58M/262M [00:02<00:50, 5.04MB/s] downloading ml-25m.zip: 3%|▎ | 9.08M/262M [00:02<00:52, 4.85MB/s] downloading ml-25m.zip: 4%|▎ | 9.58M/262M [00:02<00:55, 4.52MB/s] downloading ml-25m.zip: 4%|▍ | 10.1M/262M [00:02<00:53, 4.74MB/s] downloading ml-25m.zip: 4%|▍ | 10.7M/262M [00:02<00:50, 4.95MB/s] downloading ml-25m.zip: 4%|▍ | 11.2M/262M [00:03<00:53, 4.69MB/s] downloading ml-25m.zip: 4%|▍ | 11.7M/262M [00:03<00:52, 4.80MB/s] downloading ml-25m.zip: 5%|▍ | 12.3M/262M [00:03<00:49, 5.03MB/s] downloading ml-25m.zip: 5%|▍ | 12.8M/262M [00:03<00:49, 5.03MB/s] downloading ml-25m.zip: 5%|▌ | 13.3M/262M [00:03<00:55, 4.50MB/s] downloading ml-25m.zip: 5%|▌ | 13.8M/262M [00:03<00:59, 4.14MB/s] downloading ml-25m.zip: 5%|▌ | 14.2M/262M [00:03<00:57, 4.27MB/s] downloading ml-25m.zip: 6%|▌ | 14.7M/262M [00:03<00:58, 4.21MB/s] downloading ml-25m.zip: 6%|▌ | 15.1M/262M [00:04<01:02, 3.95MB/s] downloading ml-25m.zip: 6%|▌ | 15.5M/262M [00:04<01:05, 3.79MB/s] downloading ml-25m.zip: 6%|▌ | 15.9M/262M [00:04<01:04, 3.83MB/s] downloading ml-25m.zip: 6%|▋ | 16.4M/262M [00:04<01:02, 3.95MB/s] downloading ml-25m.zip: 6%|▋ | 16.9M/262M [00:04<01:00, 4.05MB/s] downloading ml-25m.zip: 7%|▋ | 17.4M/262M [00:04<00:58, 4.18MB/s] downloading ml-25m.zip: 7%|▋ | 17.9M/262M [00:04<00:56, 4.31MB/s] downloading ml-25m.zip: 7%|▋ | 18.4M/262M [00:04<00:55, 4.38MB/s] downloading ml-25m.zip: 7%|▋ | 19.0M/262M [00:04<00:54, 4.48MB/s] downloading ml-25m.zip: 7%|▋ | 19.5M/262M [00:05<00:52, 4.64MB/s] downloading ml-25m.zip: 8%|▊ | 19.9M/262M [00:05<00:53, 4.54MB/s] downloading ml-25m.zip: 8%|▊ | 20.4M/262M [00:05<00:52, 4.57MB/s] downloading ml-25m.zip: 8%|▊ | 20.9M/262M [00:05<00:51, 4.64MB/s] downloading ml-25m.zip: 8%|▊ | 21.4M/262M [00:05<00:50, 4.75MB/s] downloading ml-25m.zip: 8%|▊ | 21.9M/262M [00:05<00:50, 4.73MB/s] downloading ml-25m.zip: 9%|▊ | 22.4M/262M [00:05<00:50, 4.70MB/s] downloading ml-25m.zip: 9%|▉ | 22.9M/262M [00:05<00:49, 4.80MB/s] downloading ml-25m.zip: 9%|▉ | 23.5M/262M [00:05<00:48, 4.89MB/s] downloading ml-25m.zip: 9%|▉ | 24.1M/262M [00:05<00:46, 5.13MB/s] downloading ml-25m.zip: 9%|▉ | 24.6M/262M [00:06<00:45, 5.17MB/s] downloading ml-25m.zip: 10%|▉ | 25.2M/262M [00:06<00:46, 5.15MB/s] downloading ml-25m.zip: 10%|▉ | 25.7M/262M [00:06<00:47, 5.00MB/s] downloading ml-25m.zip: 10%|█ | 26.2M/262M [00:06<00:47, 5.00MB/s] downloading ml-25m.zip: 10%|█ | 26.8M/262M [00:06<00:45, 5.14MB/s] downloading ml-25m.zip: 10%|█ | 27.3M/262M [00:06<00:45, 5.15MB/s] downloading ml-25m.zip: 11%|█ | 27.9M/262M [00:06<00:44, 5.29MB/s] downloading ml-25m.zip: 11%|█ | 28.5M/262M [00:06<00:43, 5.32MB/s] downloading ml-25m.zip: 11%|█ | 29.0M/262M [00:06<00:45, 5.10MB/s] downloading ml-25m.zip: 11%|█▏ | 29.5M/262M [00:07<00:45, 5.07MB/s] downloading ml-25m.zip: 11%|█▏ | 30.1M/262M [00:07<00:44, 5.16MB/s] downloading ml-25m.zip: 12%|█▏ | 30.6M/262M [00:07<00:46, 5.00MB/s] downloading ml-25m.zip: 12%|█▏ | 31.1M/262M [00:07<00:47, 4.90MB/s] downloading ml-25m.zip: 12%|█▏ | 31.6M/262M [00:07<00:46, 4.96MB/s] downloading ml-25m.zip: 12%|█▏ | 32.1M/262M [00:07<00:47, 4.88MB/s] downloading ml-25m.zip: 12%|█▏ | 32.6M/262M [00:07<00:46, 4.90MB/s] downloading ml-25m.zip: 13%|█▎ | 33.2M/262M [00:07<00:45, 4.99MB/s] downloading ml-25m.zip: 13%|█▎ | 33.9M/262M [00:07<00:44, 5.10MB/s] downloading ml-25m.zip: 13%|█▎ | 34.5M/262M [00:08<00:43, 5.21MB/s] downloading ml-25m.zip: 13%|█▎ | 35.1M/262M [00:08<00:42, 5.38MB/s] downloading ml-25m.zip: 14%|█▎ | 35.6M/262M [00:08<00:43, 5.24MB/s] downloading ml-25m.zip: 14%|█▍ | 36.2M/262M [00:08<00:45, 4.95MB/s] downloading ml-25m.zip: 14%|█▍ | 36.7M/262M [00:08<00:45, 4.98MB/s] downloading ml-25m.zip: 14%|█▍ | 37.3M/262M [00:08<00:42, 5.25MB/s] downloading ml-25m.zip: 14%|█▍ | 37.9M/262M [00:08<00:42, 5.28MB/s] downloading ml-25m.zip: 15%|█▍ | 38.4M/262M [00:08<00:42, 5.24MB/s] downloading ml-25m.zip: 15%|█▍ | 39.0M/262M [00:08<00:41, 5.34MB/s] downloading ml-25m.zip: 15%|█▌ | 39.5M/262M [00:08<00:42, 5.23MB/s] downloading ml-25m.zip: 15%|█▌ | 40.1M/262M [00:09<00:42, 5.22MB/s] downloading ml-25m.zip: 16%|█▌ | 40.8M/262M [00:09<00:41, 5.32MB/s] downloading ml-25m.zip: 16%|█▌ | 41.4M/262M [00:09<00:41, 5.36MB/s] downloading ml-25m.zip: 16%|█▌ | 42.0M/262M [00:09<00:40, 5.38MB/s] downloading ml-25m.zip: 16%|█▋ | 42.6M/262M [00:09<00:40, 5.41MB/s] downloading ml-25m.zip: 17%|█▋ | 43.3M/262M [00:09<00:40, 5.44MB/s] downloading ml-25m.zip: 17%|█▋ | 43.8M/262M [00:09<00:39, 5.46MB/s] downloading ml-25m.zip: 17%|█▋ | 44.4M/262M [00:09<00:40, 5.38MB/s] downloading ml-25m.zip: 17%|█▋ | 44.9M/262M [00:09<00:40, 5.35MB/s] downloading ml-25m.zip: 17%|█▋ | 45.5M/262M [00:10<00:40, 5.32MB/s] downloading ml-25m.zip: 18%|█▊ | 46.1M/262M [00:10<00:40, 5.36MB/s] downloading ml-25m.zip: 18%|█▊ | 46.7M/262M [00:10<00:38, 5.53MB/s] downloading ml-25m.zip: 18%|█▊ | 47.3M/262M [00:10<00:38, 5.53MB/s] downloading ml-25m.zip: 18%|█▊ | 47.9M/262M [00:10<00:39, 5.37MB/s] downloading ml-25m.zip: 18%|█▊ | 48.4M/262M [00:10<00:40, 5.29MB/s] downloading ml-25m.zip: 19%|█▊ | 49.0M/262M [00:10<00:38, 5.49MB/s] downloading ml-25m.zip: 19%|█▉ | 49.6M/262M [00:10<00:38, 5.48MB/s] downloading ml-25m.zip: 19%|█▉ | 50.2M/262M [00:10<00:38, 5.53MB/s] downloading ml-25m.zip: 19%|█▉ | 50.7M/262M [00:11<00:38, 5.47MB/s] downloading ml-25m.zip: 20%|█▉ | 51.3M/262M [00:11<00:39, 5.39MB/s] downloading ml-25m.zip: 20%|█▉ | 51.9M/262M [00:11<00:39, 5.37MB/s] downloading ml-25m.zip: 20%|██ | 52.5M/262M [00:11<00:37, 5.56MB/s] downloading ml-25m.zip: 20%|██ | 53.0M/262M [00:11<00:37, 5.54MB/s] downloading ml-25m.zip: 20%|██ | 53.6M/262M [00:11<00:38, 5.39MB/s] downloading ml-25m.zip: 21%|██ | 54.1M/262M [00:11<00:38, 5.39MB/s] downloading ml-25m.zip: 21%|██ | 54.7M/262M [00:11<00:39, 5.31MB/s] downloading ml-25m.zip: 21%|██ | 55.4M/262M [00:11<00:38, 5.41MB/s] downloading ml-25m.zip: 21%|██▏ | 56.0M/262M [00:12<00:37, 5.50MB/s] downloading ml-25m.zip: 22%|██▏ | 56.7M/262M [00:12<00:37, 5.54MB/s] downloading ml-25m.zip: 22%|██▏ | 57.3M/262M [00:12<00:35, 5.69MB/s] downloading ml-25m.zip: 22%|██▏ | 57.9M/262M [00:12<00:36, 5.65MB/s] downloading ml-25m.zip: 22%|██▏ | 58.4M/262M [00:12<00:36, 5.63MB/s] downloading ml-25m.zip: 23%|██▎ | 59.0M/262M [00:12<00:36, 5.51MB/s] downloading ml-25m.zip: 23%|██▎ | 59.6M/262M [00:12<00:36, 5.61MB/s] downloading ml-25m.zip: 23%|██▎ | 60.2M/262M [00:12<00:35, 5.73MB/s] downloading ml-25m.zip: 23%|██▎ | 60.8M/262M [00:12<00:35, 5.65MB/s] downloading ml-25m.zip: 23%|██▎ | 61.3M/262M [00:12<00:35, 5.66MB/s] downloading ml-25m.zip: 24%|██▎ | 62.0M/262M [00:13<00:34, 5.72MB/s] downloading ml-25m.zip: 24%|██▍ | 62.6M/262M [00:13<00:33, 5.88MB/s] downloading ml-25m.zip: 24%|██▍ | 63.2M/262M [00:13<00:33, 5.88MB/s] downloading ml-25m.zip: 24%|██▍ | 63.8M/262M [00:13<00:33, 5.87MB/s] downloading ml-25m.zip: 25%|██▍ | 64.4M/262M [00:13<00:33, 5.86MB/s] downloading ml-25m.zip: 25%|██▍ | 65.0M/262M [00:13<00:33, 5.85MB/s] downloading ml-25m.zip: 25%|██▌ | 65.6M/262M [00:13<00:33, 5.86MB/s] downloading ml-25m.zip: 25%|██▌ | 66.2M/262M [00:13<00:34, 5.73MB/s] downloading ml-25m.zip: 25%|██▌ | 66.8M/262M [00:13<00:38, 5.13MB/s] downloading ml-25m.zip: 26%|██▌ | 67.3M/262M [00:14<00:41, 4.73MB/s] downloading ml-25m.zip: 26%|██▌ | 67.8M/262M [00:14<00:48, 4.00MB/s] downloading ml-25m.zip: 26%|██▌ | 68.2M/262M [00:14<00:52, 3.72MB/s] downloading ml-25m.zip: 26%|██▌ | 68.6M/262M [00:14<00:53, 3.61MB/s] downloading ml-25m.zip: 26%|██▋ | 69.0M/262M [00:14<00:54, 3.53MB/s] downloading ml-25m.zip: 26%|██▋ | 69.3M/262M [00:14<00:55, 3.49MB/s] downloading ml-25m.zip: 27%|██▋ | 69.7M/262M [00:14<00:54, 3.52MB/s] downloading ml-25m.zip: 27%|██▋ | 70.1M/262M [00:14<00:54, 3.50MB/s] downloading ml-25m.zip: 27%|██▋ | 70.4M/262M [00:15<00:53, 3.60MB/s] downloading ml-25m.zip: 27%|██▋ | 70.8M/262M [00:15<00:53, 3.59MB/s] downloading ml-25m.zip: 27%|██▋ | 71.2M/262M [00:15<00:53, 3.54MB/s] downloading ml-25m.zip: 27%|██▋ | 71.6M/262M [00:15<00:52, 3.62MB/s] downloading ml-25m.zip: 27%|██▋ | 72.0M/262M [00:15<00:52, 3.61MB/s] downloading ml-25m.zip: 28%|██▊ | 72.3M/262M [00:15<00:52, 3.62MB/s] downloading ml-25m.zip: 28%|██▊ | 72.7M/262M [00:15<00:51, 3.66MB/s] downloading ml-25m.zip: 28%|██▊ | 73.1M/262M [00:15<00:51, 3.66MB/s] downloading ml-25m.zip: 28%|██▊ | 73.4M/262M [00:15<00:51, 3.67MB/s] downloading ml-25m.zip: 28%|██▊ | 73.8M/262M [00:15<00:51, 3.66MB/s] downloading ml-25m.zip: 28%|██▊ | 74.2M/262M [00:16<00:51, 3.66MB/s] downloading ml-25m.zip: 28%|██▊ | 74.6M/262M [00:16<00:51, 3.65MB/s] downloading ml-25m.zip: 29%|██▊ | 75.0M/262M [00:16<00:52, 3.58MB/s] downloading ml-25m.zip: 29%|██▉ | 75.3M/262M [00:16<00:52, 3.58MB/s] downloading ml-25m.zip: 29%|██▉ | 75.7M/262M [00:16<00:51, 3.59MB/s] downloading ml-25m.zip: 29%|██▉ | 76.1M/262M [00:16<00:51, 3.60MB/s] downloading ml-25m.zip: 29%|██▉ | 76.5M/262M [00:16<00:50, 3.66MB/s] downloading ml-25m.zip: 29%|██▉ | 76.9M/262M [00:16<00:49, 3.77MB/s] downloading ml-25m.zip: 29%|██▉ | 77.3M/262M [00:16<00:49, 3.71MB/s] downloading ml-25m.zip: 30%|██▉ | 77.7M/262M [00:16<00:49, 3.70MB/s] downloading ml-25m.zip: 30%|██▉ | 78.1M/262M [00:17<00:49, 3.73MB/s] downloading ml-25m.zip: 30%|██▉ | 78.5M/262M [00:17<00:50, 3.65MB/s] downloading ml-25m.zip: 30%|███ | 78.8M/262M [00:17<00:54, 3.39MB/s] downloading ml-25m.zip: 30%|███ | 79.2M/262M [00:17<00:55, 3.29MB/s] downloading ml-25m.zip: 30%|███ | 79.5M/262M [00:17<00:58, 3.10MB/s] downloading ml-25m.zip: 30%|███ | 79.8M/262M [00:17<01:01, 2.96MB/s] downloading ml-25m.zip: 31%|███ | 80.2M/262M [00:17<01:04, 2.81MB/s] downloading ml-25m.zip: 31%|███ | 80.4M/262M [00:17<01:05, 2.76MB/s] downloading ml-25m.zip: 31%|███ | 80.8M/262M [00:18<01:03, 2.86MB/s] downloading ml-25m.zip: 31%|███ | 81.1M/262M [00:18<01:01, 2.92MB/s] downloading ml-25m.zip: 31%|███ | 81.5M/262M [00:18<01:01, 2.93MB/s] downloading ml-25m.zip: 31%|███ | 81.8M/262M [00:18<01:16, 2.36MB/s] downloading ml-25m.zip: 31%|███▏ | 82.1M/262M [00:18<01:10, 2.55MB/s] downloading ml-25m.zip: 31%|███▏ | 82.4M/262M [00:18<01:13, 2.44MB/s] downloading ml-25m.zip: 32%|███▏ | 82.7M/262M [00:18<01:13, 2.45MB/s] downloading ml-25m.zip: 32%|███▏ | 82.9M/262M [00:18<01:13, 2.44MB/s] downloading ml-25m.zip: 32%|███▏ | 83.2M/262M [00:19<01:15, 2.36MB/s] downloading ml-25m.zip: 32%|███▏ | 83.4M/262M [00:19<01:15, 2.36MB/s] downloading ml-25m.zip: 32%|███▏ | 83.7M/262M [00:19<01:14, 2.40MB/s] downloading ml-25m.zip: 32%|███▏ | 84.0M/262M [00:19<01:12, 2.46MB/s] downloading ml-25m.zip: 32%|███▏ | 84.3M/262M [00:19<01:11, 2.49MB/s] downloading ml-25m.zip: 32%|███▏ | 84.6M/262M [00:19<01:08, 2.57MB/s] downloading ml-25m.zip: 32%|███▏ | 84.9M/262M [00:19<01:07, 2.62MB/s] downloading ml-25m.zip: 33%|███▎ | 85.2M/262M [00:19<01:07, 2.64MB/s] downloading ml-25m.zip: 33%|███▎ | 85.5M/262M [00:19<01:06, 2.65MB/s] downloading ml-25m.zip: 33%|███▎ | 85.8M/262M [00:20<01:06, 2.65MB/s] downloading ml-25m.zip: 33%|███▎ | 86.2M/262M [00:20<01:05, 2.69MB/s] downloading ml-25m.zip: 33%|███▎ | 86.5M/262M [00:20<01:04, 2.70MB/s] downloading ml-25m.zip: 33%|███▎ | 86.8M/262M [00:20<01:02, 2.79MB/s] downloading ml-25m.zip: 33%|███▎ | 87.1M/262M [00:20<01:03, 2.75MB/s] downloading ml-25m.zip: 33%|███▎ | 87.3M/262M [00:20<01:03, 2.74MB/s] downloading ml-25m.zip: 33%|███▎ | 87.6M/262M [00:20<01:04, 2.68MB/s] downloading ml-25m.zip: 34%|███▎ | 87.9M/262M [00:20<01:05, 2.65MB/s] downloading ml-25m.zip: 34%|███▎ | 88.2M/262M [00:20<01:02, 2.78MB/s] downloading ml-25m.zip: 34%|███▍ | 88.5M/262M [00:21<01:00, 2.88MB/s] downloading ml-25m.zip: 34%|███▍ | 88.8M/262M [00:21<01:01, 2.80MB/s] downloading ml-25m.zip: 34%|███▍ | 89.1M/262M [00:21<01:01, 2.79MB/s] downloading ml-25m.zip: 34%|███▍ | 89.4M/262M [00:21<01:04, 2.68MB/s] downloading ml-25m.zip: 34%|███▍ | 89.7M/262M [00:21<01:02, 2.75MB/s] downloading ml-25m.zip: 34%|███▍ | 90.0M/262M [00:21<00:59, 2.89MB/s] downloading ml-25m.zip: 34%|███▍ | 90.3M/262M [00:21<01:00, 2.85MB/s] downloading ml-25m.zip: 35%|███▍ | 90.6M/262M [00:21<01:00, 2.84MB/s] downloading ml-25m.zip: 35%|███▍ | 90.9M/262M [00:21<01:00, 2.82MB/s] downloading ml-25m.zip: 35%|███▍ | 91.3M/262M [00:21<00:59, 2.89MB/s] downloading ml-25m.zip: 35%|███▍ | 91.6M/262M [00:22<00:58, 2.93MB/s] downloading ml-25m.zip: 35%|███▌ | 92.0M/262M [00:22<00:57, 2.98MB/s] downloading ml-25m.zip: 35%|███▌ | 92.3M/262M [00:22<00:55, 3.04MB/s] downloading ml-25m.zip: 35%|███▌ | 92.7M/262M [00:22<00:55, 3.07MB/s] downloading ml-25m.zip: 36%|███▌ | 93.1M/262M [00:22<00:54, 3.12MB/s] downloading ml-25m.zip: 36%|███▌ | 93.4M/262M [00:22<00:53, 3.15MB/s] downloading ml-25m.zip: 36%|███▌ | 93.8M/262M [00:22<00:53, 3.17MB/s] downloading ml-25m.zip: 36%|███▌ | 94.2M/262M [00:22<00:52, 3.21MB/s] downloading ml-25m.zip: 36%|███▌ | 94.5M/262M [00:23<00:51, 3.24MB/s] downloading ml-25m.zip: 36%|███▌ | 94.9M/262M [00:23<00:51, 3.26MB/s] downloading ml-25m.zip: 36%|███▋ | 95.3M/262M [00:23<00:50, 3.27MB/s] downloading ml-25m.zip: 37%|███▋ | 95.7M/262M [00:23<00:50, 3.30MB/s] downloading ml-25m.zip: 37%|███▋ | 96.1M/262M [00:23<00:50, 3.27MB/s] downloading ml-25m.zip: 37%|███▋ | 96.4M/262M [00:23<00:48, 3.39MB/s] downloading ml-25m.zip: 37%|███▋ | 96.8M/262M [00:23<00:48, 3.38MB/s] downloading ml-25m.zip: 37%|███▋ | 97.1M/262M [00:23<00:49, 3.33MB/s] downloading ml-25m.zip: 37%|███▋ | 97.5M/262M [00:23<00:49, 3.33MB/s] downloading ml-25m.zip: 37%|███▋ | 97.8M/262M [00:24<00:49, 3.33MB/s] downloading ml-25m.zip: 37%|███▋ | 98.2M/262M [00:24<00:48, 3.35MB/s] downloading ml-25m.zip: 38%|███▊ | 98.6M/262M [00:24<00:48, 3.35MB/s] downloading ml-25m.zip: 38%|███▊ | 99.0M/262M [00:24<00:47, 3.46MB/s] downloading ml-25m.zip: 38%|███▊ | 99.4M/262M [00:24<00:47, 3.45MB/s] downloading ml-25m.zip: 38%|███▊ | 99.7M/262M [00:24<00:49, 3.28MB/s] downloading ml-25m.zip: 38%|███▊ | 100M/262M [00:24<00:49, 3.28MB/s] downloading ml-25m.zip: 38%|███▊ | 100M/262M [00:24<00:49, 3.27MB/s] downloading ml-25m.zip: 38%|███▊ | 101M/262M [00:24<00:51, 3.13MB/s] downloading ml-25m.zip: 39%|███▊ | 101M/262M [00:24<00:47, 3.38MB/s] downloading ml-25m.zip: 39%|███▊ | 102M/262M [00:25<00:48, 3.32MB/s] downloading ml-25m.zip: 39%|███▉ | 102M/262M [00:25<00:46, 3.45MB/s] downloading ml-25m.zip: 39%|███▉ | 102M/262M [00:25<00:47, 3.36MB/s] downloading ml-25m.zip: 39%|███▉ | 103M/262M [00:25<00:47, 3.39MB/s] downloading ml-25m.zip: 39%|███▉ | 103M/262M [00:25<00:46, 3.44MB/s] downloading ml-25m.zip: 39%|███▉ | 103M/262M [00:25<00:49, 3.21MB/s] downloading ml-25m.zip: 40%|███▉ | 104M/262M [00:25<00:47, 3.30MB/s] downloading ml-25m.zip: 40%|███▉ | 104M/262M [00:25<00:45, 3.49MB/s] downloading ml-25m.zip: 40%|███▉ | 104M/262M [00:25<00:45, 3.46MB/s] downloading ml-25m.zip: 40%|████ | 105M/262M [00:26<00:45, 3.45MB/s] downloading ml-25m.zip: 40%|████ | 105M/262M [00:26<00:44, 3.54MB/s] downloading ml-25m.zip: 40%|████ | 106M/262M [00:26<00:46, 3.39MB/s] downloading ml-25m.zip: 40%|████ | 106M/262M [00:26<00:44, 3.54MB/s] downloading ml-25m.zip: 41%|████ | 106M/262M [00:26<00:43, 3.61MB/s] downloading ml-25m.zip: 41%|████ | 107M/262M [00:26<00:41, 3.75MB/s] downloading ml-25m.zip: 41%|████ | 107M/262M [00:26<00:43, 3.60MB/s] downloading ml-25m.zip: 41%|████ | 108M/262M [00:26<00:41, 3.74MB/s] downloading ml-25m.zip: 41%|████▏ | 108M/262M [00:26<00:39, 3.90MB/s] downloading ml-25m.zip: 41%|████▏ | 108M/262M [00:27<00:39, 3.90MB/s] downloading ml-25m.zip: 42%|████▏ | 109M/262M [00:27<00:40, 3.80MB/s] downloading ml-25m.zip: 42%|████▏ | 109M/262M [00:27<00:39, 3.90MB/s] downloading ml-25m.zip: 42%|████▏ | 110M/262M [00:27<00:37, 4.10MB/s] downloading ml-25m.zip: 42%|████▏ | 110M/262M [00:27<00:36, 4.18MB/s] downloading ml-25m.zip: 42%|████▏ | 111M/262M [00:27<00:36, 4.11MB/s] downloading ml-25m.zip: 42%|████▏ | 111M/262M [00:27<00:37, 4.04MB/s] downloading ml-25m.zip: 43%|████▎ | 112M/262M [00:27<00:36, 4.12MB/s] downloading ml-25m.zip: 43%|████▎ | 112M/262M [00:27<00:34, 4.30MB/s] downloading ml-25m.zip: 43%|████▎ | 113M/262M [00:28<00:33, 4.41MB/s] downloading ml-25m.zip: 43%|████▎ | 113M/262M [00:28<00:32, 4.56MB/s] downloading ml-25m.zip: 43%|████▎ | 114M/262M [00:28<00:31, 4.66MB/s] downloading ml-25m.zip: 44%|████▎ | 114M/262M [00:28<00:30, 4.82MB/s] downloading ml-25m.zip: 44%|████▍ | 115M/262M [00:28<00:29, 4.91MB/s] downloading ml-25m.zip: 44%|████▍ | 115M/262M [00:28<00:29, 5.03MB/s] downloading ml-25m.zip: 44%|████▍ | 116M/262M [00:28<00:28, 5.16MB/s] downloading ml-25m.zip: 45%|████▍ | 117M/262M [00:28<00:27, 5.29MB/s] downloading ml-25m.zip: 45%|████▍ | 117M/262M [00:28<00:26, 5.44MB/s] downloading ml-25m.zip: 45%|████▌ | 118M/262M [00:29<00:25, 5.59MB/s] downloading ml-25m.zip: 45%|████▌ | 119M/262M [00:29<00:25, 5.72MB/s] downloading ml-25m.zip: 46%|████▌ | 119M/262M [00:29<00:24, 5.86MB/s] downloading ml-25m.zip: 46%|████▌ | 120M/262M [00:29<00:23, 6.04MB/s] downloading ml-25m.zip: 46%|████▌ | 121M/262M [00:29<00:22, 6.20MB/s] downloading ml-25m.zip: 46%|████▋ | 122M/262M [00:29<00:22, 6.29MB/s] downloading ml-25m.zip: 47%|████▋ | 122M/262M [00:29<00:22, 6.26MB/s] downloading ml-25m.zip: 47%|████▋ | 123M/262M [00:29<00:21, 6.48MB/s] downloading ml-25m.zip: 47%|████▋ | 124M/262M [00:29<00:20, 6.84MB/s] downloading ml-25m.zip: 48%|████▊ | 125M/262M [00:30<00:19, 7.17MB/s] downloading ml-25m.zip: 48%|████▊ | 125M/262M [00:30<00:19, 7.18MB/s] downloading ml-25m.zip: 48%|████▊ | 126M/262M [00:30<00:19, 7.10MB/s] downloading ml-25m.zip: 48%|████▊ | 127M/262M [00:30<00:17, 7.56MB/s] downloading ml-25m.zip: 49%|████▉ | 128M/262M [00:30<00:16, 7.96MB/s] downloading ml-25m.zip: 49%|████▉ | 129M/262M [00:30<00:16, 7.95MB/s] downloading ml-25m.zip: 49%|████▉ | 129M/262M [00:30<00:16, 7.88MB/s] downloading ml-25m.zip: 50%|████▉ | 130M/262M [00:30<00:16, 8.09MB/s] downloading ml-25m.zip: 50%|█████ | 131M/262M [00:30<00:15, 8.39MB/s] downloading ml-25m.zip: 51%|█████ | 133M/262M [00:30<00:14, 8.93MB/s] downloading ml-25m.zip: 51%|█████ | 134M/262M [00:31<00:13, 9.22MB/s] downloading ml-25m.zip: 51%|█████▏ | 135M/262M [00:31<00:13, 9.34MB/s] downloading ml-25m.zip: 52%|█████▏ | 135M/262M [00:31<00:13, 9.31MB/s] downloading ml-25m.zip: 52%|█████▏ | 137M/262M [00:31<00:13, 9.64MB/s] downloading ml-25m.zip: 53%|█████▎ | 138M/262M [00:31<00:12, 10.2MB/s] downloading ml-25m.zip: 53%|█████▎ | 139M/262M [00:31<00:11, 10.4MB/s] downloading ml-25m.zip: 53%|█████▎ | 140M/262M [00:31<00:11, 10.4MB/s] downloading ml-25m.zip: 54%|█████▍ | 141M/262M [00:31<00:11, 10.5MB/s] downloading ml-25m.zip: 54%|█████▍ | 142M/262M [00:31<00:10, 11.2MB/s] downloading ml-25m.zip: 55%|█████▍ | 143M/262M [00:31<00:10, 11.1MB/s] downloading ml-25m.zip: 55%|█████▌ | 145M/262M [00:32<00:10, 11.4MB/s] downloading ml-25m.zip: 56%|█████▌ | 146M/262M [00:32<00:09, 11.7MB/s] downloading ml-25m.zip: 56%|█████▌ | 147M/262M [00:32<00:09, 11.9MB/s] downloading ml-25m.zip: 57%|█████▋ | 148M/262M [00:32<00:09, 11.9MB/s] downloading ml-25m.zip: 57%|█████▋ | 150M/262M [00:32<00:09, 12.4MB/s] downloading ml-25m.zip: 58%|█████▊ | 151M/262M [00:32<00:08, 12.5MB/s] downloading ml-25m.zip: 58%|█████▊ | 152M/262M [00:32<00:08, 13.1MB/s] downloading ml-25m.zip: 59%|█████▊ | 154M/262M [00:32<00:08, 13.3MB/s] downloading ml-25m.zip: 59%|█████▉ | 155M/262M [00:32<00:08, 12.9MB/s] downloading ml-25m.zip: 60%|█████▉ | 156M/262M [00:32<00:08, 13.0MB/s] downloading ml-25m.zip: 60%|██████ | 158M/262M [00:33<00:07, 13.6MB/s] downloading ml-25m.zip: 61%|██████ | 160M/262M [00:33<00:07, 14.2MB/s] downloading ml-25m.zip: 62%|██████▏ | 161M/262M [00:33<00:06, 14.5MB/s] downloading ml-25m.zip: 62%|██████▏ | 163M/262M [00:33<00:06, 15.1MB/s] downloading ml-25m.zip: 63%|██████▎ | 165M/262M [00:33<00:06, 15.5MB/s] downloading ml-25m.zip: 63%|██████▎ | 166M/262M [00:33<00:05, 16.3MB/s] downloading ml-25m.zip: 64%|██████▍ | 168M/262M [00:33<00:05, 16.6MB/s] downloading ml-25m.zip: 65%|██████▍ | 170M/262M [00:33<00:05, 16.5MB/s] downloading ml-25m.zip: 65%|██████▌ | 171M/262M [00:33<00:05, 16.2MB/s] downloading ml-25m.zip: 66%|██████▌ | 173M/262M [00:34<00:05, 16.8MB/s] downloading ml-25m.zip: 67%|██████▋ | 175M/262M [00:34<00:04, 17.6MB/s] downloading ml-25m.zip: 68%|██████▊ | 177M/262M [00:34<00:04, 18.0MB/s] downloading ml-25m.zip: 68%|██████▊ | 179M/262M [00:34<00:04, 18.0MB/s] downloading ml-25m.zip: 69%|██████▉ | 181M/262M [00:34<00:04, 18.1MB/s] downloading ml-25m.zip: 70%|██████▉ | 183M/262M [00:34<00:04, 18.3MB/s] downloading ml-25m.zip: 71%|███████ | 185M/262M [00:34<00:04, 19.2MB/s] downloading ml-25m.zip: 71%|███████▏ | 187M/262M [00:34<00:03, 19.7MB/s] downloading ml-25m.zip: 72%|███████▏ | 189M/262M [00:34<00:03, 19.9MB/s] downloading ml-25m.zip: 73%|███████▎ | 191M/262M [00:34<00:04, 17.2MB/s] downloading ml-25m.zip: 74%|███████▎ | 193M/262M [00:35<00:04, 14.8MB/s] downloading ml-25m.zip: 74%|███████▍ | 194M/262M [00:35<00:05, 12.7MB/s] downloading ml-25m.zip: 75%|███████▍ | 196M/262M [00:35<00:05, 11.9MB/s] downloading ml-25m.zip: 75%|███████▌ | 197M/262M [00:35<00:05, 11.5MB/s] downloading ml-25m.zip: 76%|███████▌ | 198M/262M [00:35<00:05, 11.0MB/s] downloading ml-25m.zip: 76%|███████▌ | 199M/262M [00:35<00:05, 10.7MB/s] downloading ml-25m.zip: 76%|███████▋ | 200M/262M [00:35<00:05, 10.7MB/s] downloading ml-25m.zip: 77%|███████▋ | 202M/262M [00:36<00:05, 11.1MB/s] downloading ml-25m.zip: 77%|███████▋ | 203M/262M [00:36<00:05, 11.1MB/s] downloading ml-25m.zip: 78%|███████▊ | 204M/262M [00:36<00:05, 11.0MB/s] downloading ml-25m.zip: 78%|███████▊ | 205M/262M [00:36<00:05, 11.2MB/s] downloading ml-25m.zip: 79%|███████▊ | 206M/262M [00:36<00:05, 10.9MB/s] downloading ml-25m.zip: 79%|███████▉ | 207M/262M [00:36<00:04, 11.2MB/s] downloading ml-25m.zip: 80%|███████▉ | 209M/262M [00:36<00:04, 11.3MB/s] downloading ml-25m.zip: 80%|████████ | 210M/262M [00:36<00:04, 11.8MB/s] downloading ml-25m.zip: 81%|████████ | 211M/262M [00:36<00:04, 12.0MB/s] downloading ml-25m.zip: 81%|████████ | 212M/262M [00:36<00:04, 11.9MB/s] downloading ml-25m.zip: 82%|████████▏ | 214M/262M [00:37<00:04, 11.9MB/s] downloading ml-25m.zip: 82%|████████▏ | 215M/262M [00:37<00:03, 12.3MB/s] downloading ml-25m.zip: 82%|████████▏ | 216M/262M [00:37<00:03, 12.2MB/s] downloading ml-25m.zip: 83%|████████▎ | 217M/262M [00:37<00:03, 12.2MB/s] downloading ml-25m.zip: 83%|████████▎ | 219M/262M [00:37<00:03, 12.4MB/s] downloading ml-25m.zip: 84%|████████▍ | 220M/262M [00:37<00:03, 11.8MB/s] downloading ml-25m.zip: 84%|████████▍ | 221M/262M [00:37<00:03, 11.3MB/s] downloading ml-25m.zip: 85%|████████▍ | 222M/262M [00:37<00:03, 11.0MB/s] downloading ml-25m.zip: 85%|████████▌ | 223M/262M [00:37<00:03, 10.8MB/s] downloading ml-25m.zip: 86%|████████▌ | 224M/262M [00:38<00:03, 10.7MB/s] downloading ml-25m.zip: 86%|████████▌ | 226M/262M [00:38<00:03, 10.3MB/s] downloading ml-25m.zip: 86%|████████▋ | 227M/262M [00:38<00:03, 10.2MB/s] downloading ml-25m.zip: 87%|████████▋ | 228M/262M [00:38<00:03, 10.2MB/s] downloading ml-25m.zip: 87%|████████▋ | 229M/262M [00:38<00:03, 10.2MB/s] downloading ml-25m.zip: 88%|████████▊ | 230M/262M [00:38<00:03, 8.90MB/s] downloading ml-25m.zip: 88%|████████▊ | 231M/262M [00:38<00:03, 9.01MB/s] downloading ml-25m.zip: 88%|████████▊ | 231M/262M [00:38<00:03, 8.96MB/s] downloading ml-25m.zip: 89%|████████▊ | 232M/262M [00:38<00:03, 8.68MB/s] downloading ml-25m.zip: 89%|████████▉ | 233M/262M [00:39<00:03, 8.95MB/s] downloading ml-25m.zip: 89%|████████▉ | 234M/262M [00:39<00:03, 9.01MB/s] downloading ml-25m.zip: 90%|████████▉ | 235M/262M [00:39<00:02, 9.27MB/s] downloading ml-25m.zip: 90%|█████████ | 236M/262M [00:39<00:02, 9.68MB/s] downloading ml-25m.zip: 91%|█████████ | 238M/262M [00:39<00:02, 10.2MB/s] downloading ml-25m.zip: 91%|█████████ | 239M/262M [00:39<00:02, 10.0MB/s] downloading ml-25m.zip: 91%|█████████▏| 240M/262M [00:39<00:02, 10.0MB/s] downloading ml-25m.zip: 92%|█████████▏| 241M/262M [00:39<00:01, 10.6MB/s] downloading ml-25m.zip: 92%|█████████▏| 242M/262M [00:39<00:01, 10.9MB/s] downloading ml-25m.zip: 93%|█████████▎| 243M/262M [00:39<00:01, 10.9MB/s] downloading ml-25m.zip: 93%|█████████▎| 244M/262M [00:40<00:01, 10.8MB/s] downloading ml-25m.zip: 94%|█████████▎| 245M/262M [00:40<00:01, 10.8MB/s] downloading ml-25m.zip: 94%|█████████▍| 246M/262M [00:40<00:01, 9.76MB/s] downloading ml-25m.zip: 94%|█████████▍| 247M/262M [00:40<00:01, 9.20MB/s] downloading ml-25m.zip: 95%|█████████▍| 248M/262M [00:40<00:01, 8.93MB/s] downloading ml-25m.zip: 95%|█████████▌| 249M/262M [00:40<00:01, 8.81MB/s] downloading ml-25m.zip: 96%|█████████▌| 250M/262M [00:40<00:01, 8.66MB/s] downloading ml-25m.zip: 96%|█████████▌| 251M/262M [00:40<00:01, 8.52MB/s] downloading ml-25m.zip: 96%|█████████▌| 252M/262M [00:40<00:01, 8.49MB/s] downloading ml-25m.zip: 97%|█████████▋| 253M/262M [00:41<00:01, 8.64MB/s] downloading ml-25m.zip: 97%|█████████▋| 254M/262M [00:41<00:00, 8.71MB/s] downloading ml-25m.zip: 97%|█████████▋| 255M/262M [00:41<00:00, 8.65MB/s] downloading ml-25m.zip: 98%|█████████▊| 256M/262M [00:41<00:00, 8.21MB/s] downloading ml-25m.zip: 98%|█████████▊| 256M/262M [00:41<00:00, 8.23MB/s] downloading ml-25m.zip: 98%|█████████▊| 257M/262M [00:41<00:00, 8.04MB/s] downloading ml-25m.zip: 98%|█████████▊| 258M/262M [00:41<00:00, 7.17MB/s] downloading ml-25m.zip: 99%|█████████▉| 259M/262M [00:41<00:00, 6.46MB/s] downloading ml-25m.zip: 99%|█████████▉| 259M/262M [00:41<00:00, 6.29MB/s] downloading ml-25m.zip: 99%|█████████▉| 260M/262M [00:42<00:00, 6.31MB/s] downloading ml-25m.zip: 100%|█████████▉| 261M/262M [00:42<00:00, 6.35MB/s] downloading ml-25m.zip: 100%|█████████▉| 261M/262M [00:42<00:00, 6.45MB/s] downloading ml-25m.zip: 262MB [00:42, 6.18MB/s]
unzipping files: 0%| | 0/8 [00:00<?, ?files/s] unzipping files: 25%|██▌ | 2/8 [00:00<00:00, 10.25files/s] unzipping files: 62%|██████▎ | 5/8 [00:03<00:02, 1.34files/s] unzipping files: 88%|████████▊ | 7/8 [00:04<00:00, 1.30files/s] unzipping files: 100%|██████████| 8/8 [00:04<00:00, 1.61files/s] =============================== warnings summary =============================== ../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18 /var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The
nvtabular.framework_utils
module is being replaced by the Merlin Models library. Support for importing fromnvtabular.framework_utils
is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead. warnings.warn(tests/unit/systems/test_ensemble.py: 4 warnings tests/unit/systems/test_export.py: 1 warning tests/unit/systems/test_inference_ops.py: 2 warnings tests/unit/systems/test_op_runner.py: 4 warnings tests/unit/systems/hugectr/test_hugectr.py: 1 warning /usr/local/lib/python3.8/dist-packages/cudf/core/dataframe.py:1292: UserWarning: The deep parameter is ignored and is only included for pandas compatibility. warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html =========================== short test summary info ============================ FAILED tests/unit/systems/hugectr/test_hugectr.py::test_training - AttributeE... ======= 1 failed, 17 passed, 2 skipped, 16 warnings in 124.49s (0:02:04) ======= Build step 'Execute shell' marked build as failure Performing Post build task... Match found for : : True Logical operation result is TRUE Running script : #!/bin/bash cd /var/jenkins_home/ CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log" [merlin_systems] $ /bin/bash /tmp/jenkins12197240355916266001.sh
Click to view CI Results
GitHub pull request #125 of commit 88883cb15df0f5ae6dd0210c587bc5078dc78580, no merge conflicts. Running as SYSTEM Setting status of 88883cb15df0f5ae6dd0210c587bc5078dc78580 to PENDING with url https://10.20.13.93:8080/job/merlin_systems/116/console and message: 'Pending' Using context: Jenkins Building on master in workspace /var/jenkins_home/workspace/merlin_systems using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e > git rev-parse --is-inside-work-tree # timeout=10 Fetching changes from the remote Git repository > git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10 Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems > git --version # timeout=10 using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken > git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10 > git rev-parse 88883cb15df0f5ae6dd0210c587bc5078dc78580^{commit} # timeout=10 Checking out Revision 88883cb15df0f5ae6dd0210c587bc5078dc78580 (detached) > git config core.sparsecheckout # timeout=10 > git checkout -f 88883cb15df0f5ae6dd0210c587bc5078dc78580 # timeout=10 Commit message: "hugectr op is green for single hot columns" > git rev-list --no-walk c06e88c5289b7ee494c9251d032a0a1aff95944f # timeout=10 [merlin_systems] $ /bin/bash /tmp/jenkins5826133664549193553.sh ============================= test session starts ============================== platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0 rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0 collected 44 itemstests/unit/test_version.py . [ 2%] tests/unit/systems/test_ensemble.py ... [ 9%] tests/unit/systems/test_ensemble_ops.py .. [ 13%] tests/unit/systems/test_export.py . [ 15%] tests/unit/systems/test_graph.py . [ 18%] tests/unit/systems/test_inference_ops.py .. [ 22%] tests/unit/systems/test_op_runner.py .... [ 31%] tests/unit/systems/test_tensorflow_inf_op.py ... [ 38%] tests/unit/systems/fil/test_fil.py ....................... [ 90%] tests/unit/systems/fil/test_forest.py ... [ 97%] tests/unit/systems/hugectr/test_hugectr.py . [100%]
=============================== warnings summary =============================== ../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18 /var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The
nvtabular.framework_utils
module is being replaced by the Merlin Models library. Support for importing fromnvtabular.framework_utils
is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead. warnings.warn(tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet] tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet] tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet] /usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility. warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html ================== 44 passed, 13 warnings in 78.85s (0:01:18) ================== Performing Post build task... Match found for : : True Logical operation result is TRUE Running script : #!/bin/bash cd /var/jenkins_home/ CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log" [merlin_systems] $ /bin/bash /tmp/jenkins3348732409581959692.sh
Click to view CI Results
GitHub pull request #125 of commit c379fa2fea8d862ddef4478c02ee3ff0c4dcb93d, no merge conflicts. Running as SYSTEM Setting status of c379fa2fea8d862ddef4478c02ee3ff0c4dcb93d to PENDING with url https://10.20.13.93:8080/job/merlin_systems/117/console and message: 'Pending' Using context: Jenkins Building on master in workspace /var/jenkins_home/workspace/merlin_systems using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e > git rev-parse --is-inside-work-tree # timeout=10 Fetching changes from the remote Git repository > git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10 Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems > git --version # timeout=10 using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken > git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10 > git rev-parse c379fa2fea8d862ddef4478c02ee3ff0c4dcb93d^{commit} # timeout=10 Checking out Revision c379fa2fea8d862ddef4478c02ee3ff0c4dcb93d (detached) > git config core.sparsecheckout # timeout=10 > git checkout -f c379fa2fea8d862ddef4478c02ee3ff0c4dcb93d # timeout=10 Commit message: "Merge branch 'main' of https://github.com/NVIDIA-Merlin/systems into add-hugectr-op" > git rev-list --no-walk 88883cb15df0f5ae6dd0210c587bc5078dc78580 # timeout=10 [merlin_systems] $ /bin/bash /tmp/jenkins4541704241858875195.sh ============================= test session starts ============================== platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0 rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0 collected 48 itemstests/unit/test_version.py . [ 2%] tests/unit/systems/test_ensemble.py .... [ 10%] tests/unit/systems/test_ensemble_ops.py .. [ 14%] tests/unit/systems/test_export.py . [ 16%] tests/unit/systems/test_graph.py . [ 18%] tests/unit/systems/test_inference_ops.py .. [ 22%] tests/unit/systems/test_op_runner.py .... [ 31%] tests/unit/systems/test_tensorflow_inf_op.py ... [ 37%] tests/unit/systems/fil/test_fil.py .......................... [ 91%] tests/unit/systems/fil/test_forest.py ... [ 97%] tests/unit/systems/hugectr/test_hugectr.py . [100%]
=============================== warnings summary =============================== ../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18 /var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The
nvtabular.framework_utils
module is being replaced by the Merlin Models library. Support for importing fromnvtabular.framework_utils
is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead. warnings.warn(tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet] tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet] tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet] /usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility. warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/fil/test_fil.py::test_binary_classifier_default[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_binary_classifier_with_proba[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_multi_classifier[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_regressor[sklearn_forest_regressor-get_model_params4] tests/unit/systems/fil/test_fil.py::test_model_file[sklearn_forest_regressor-checkpoint.tl] /usr/local/lib/python3.8/dist-packages/sklearn/utils/deprecation.py:103: FutureWarning: Attribute
n_features_
was deprecated in version 1.0 and will be removed in 1.2. Usen_features_in_
instead. warnings.warn(msg, category=FutureWarning)-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html ================== 48 passed, 18 warnings in 92.52s (0:01:32) ================== Performing Post build task... Match found for : : True Logical operation result is TRUE Running script : #!/bin/bash cd /var/jenkins_home/ CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log" [merlin_systems] $ /bin/bash /tmp/jenkins14795559711341224238.sh
Click to view CI Results
GitHub pull request #125 of commit 7be0df83b88908a01c093ff5d1a77979daeaee8c, no merge conflicts. Running as SYSTEM Setting status of 7be0df83b88908a01c093ff5d1a77979daeaee8c to PENDING with url https://10.20.13.93:8080/job/merlin_systems/118/console and message: 'Pending' Using context: Jenkins Building on master in workspace /var/jenkins_home/workspace/merlin_systems using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e > git rev-parse --is-inside-work-tree # timeout=10 Fetching changes from the remote Git repository > git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10 Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems > git --version # timeout=10 using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken > git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10 > git rev-parse 7be0df83b88908a01c093ff5d1a77979daeaee8c^{commit} # timeout=10 Checking out Revision 7be0df83b88908a01c093ff5d1a77979daeaee8c (detached) > git config core.sparsecheckout # timeout=10 > git checkout -f 7be0df83b88908a01c093ff5d1a77979daeaee8c # timeout=10 Commit message: "add skip for module and add init" > git rev-list --no-walk c379fa2fea8d862ddef4478c02ee3ff0c4dcb93d # timeout=10 [merlin_systems] $ /bin/bash /tmp/jenkins10365529385200628683.sh ============================= test session starts ============================== platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0 rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0 collected 48 itemstests/unit/test_version.py . [ 2%] tests/unit/systems/test_ensemble.py .... [ 10%] tests/unit/systems/test_ensemble_ops.py .. [ 14%] tests/unit/systems/test_export.py . [ 16%] tests/unit/systems/test_graph.py . [ 18%] tests/unit/systems/test_inference_ops.py .. [ 22%] tests/unit/systems/test_op_runner.py .... [ 31%] tests/unit/systems/test_tensorflow_inf_op.py ... [ 37%] tests/unit/systems/fil/test_fil.py .......................... [ 91%] tests/unit/systems/fil/test_forest.py ... [ 97%] tests/unit/systems/hugectr/test_hugectr.py . [100%]
=============================== warnings summary =============================== ../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18 /var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The
nvtabular.framework_utils
module is being replaced by the Merlin Models library. Support for importing fromnvtabular.framework_utils
is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead. warnings.warn(tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet] tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet] tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet] /usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility. warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/fil/test_fil.py::test_binary_classifier_default[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_binary_classifier_with_proba[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_multi_classifier[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_regressor[sklearn_forest_regressor-get_model_params4] tests/unit/systems/fil/test_fil.py::test_model_file[sklearn_forest_regressor-checkpoint.tl] /usr/local/lib/python3.8/dist-packages/sklearn/utils/deprecation.py:103: FutureWarning: Attribute
n_features_
was deprecated in version 1.0 and will be removed in 1.2. Usen_features_in_
instead. warnings.warn(msg, category=FutureWarning)-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html ================= 48 passed, 18 warnings in 418.29s (0:06:58) ================== Performing Post build task... Match found for : : True Logical operation result is TRUE Running script : #!/bin/bash cd /var/jenkins_home/ CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log" [merlin_systems] $ /bin/bash /tmp/jenkins10001184135030396013.sh
Click to view CI Results
GitHub pull request #125 of commit 1bbda7b9aedf11d2bc56b4542a26f7a3db8872fb, no merge conflicts. Running as SYSTEM Setting status of 1bbda7b9aedf11d2bc56b4542a26f7a3db8872fb to PENDING with url https://10.20.13.93:8080/job/merlin_systems/119/console and message: 'Pending' Using context: Jenkins Building on master in workspace /var/jenkins_home/workspace/merlin_systems using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e > git rev-parse --is-inside-work-tree # timeout=10 Fetching changes from the remote Git repository > git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10 Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems > git --version # timeout=10 using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken > git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10 > git rev-parse 1bbda7b9aedf11d2bc56b4542a26f7a3db8872fb^{commit} # timeout=10 Checking out Revision 1bbda7b9aedf11d2bc56b4542a26f7a3db8872fb (detached) > git config core.sparsecheckout # timeout=10 > git checkout -f 1bbda7b9aedf11d2bc56b4542a26f7a3db8872fb # timeout=10 Commit message: "remove common folder in tests and remove unneeded lines in test hugectr" > git rev-list --no-walk 7be0df83b88908a01c093ff5d1a77979daeaee8c # timeout=10 [merlin_systems] $ /bin/bash /tmp/jenkins8677967745791111808.sh ============================= test session starts ============================== platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0 rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0 collected 48 itemstests/unit/test_version.py . [ 2%] tests/unit/systems/test_ensemble.py .... [ 10%] tests/unit/systems/test_ensemble_ops.py .. [ 14%] tests/unit/systems/test_export.py . [ 16%] tests/unit/systems/test_graph.py . [ 18%] tests/unit/systems/test_inference_ops.py .. [ 22%] tests/unit/systems/test_op_runner.py .... [ 31%] tests/unit/systems/test_tensorflow_inf_op.py ... [ 37%] tests/unit/systems/fil/test_fil.py .......................... [ 91%] tests/unit/systems/fil/test_forest.py ... [ 97%] tests/unit/systems/hugectr/test_hugectr.py . [100%]
=============================== warnings summary =============================== ../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18 /var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The
nvtabular.framework_utils
module is being replaced by the Merlin Models library. Support for importing fromnvtabular.framework_utils
is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead. warnings.warn(tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet] tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet] tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet] /usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility. warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/fil/test_fil.py::test_binary_classifier_default[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_binary_classifier_with_proba[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_multi_classifier[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_regressor[sklearn_forest_regressor-get_model_params4] tests/unit/systems/fil/test_fil.py::test_model_file[sklearn_forest_regressor-checkpoint.tl] /usr/local/lib/python3.8/dist-packages/sklearn/utils/deprecation.py:103: FutureWarning: Attribute
n_features_
was deprecated in version 1.0 and will be removed in 1.2. Usen_features_in_
instead. warnings.warn(msg, category=FutureWarning)-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html ================= 48 passed, 18 warnings in 316.87s (0:05:16) ================== Performing Post build task... Match found for : : True Logical operation result is TRUE Running script : #!/bin/bash cd /var/jenkins_home/ CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log" [merlin_systems] $ /bin/bash /tmp/jenkins17829727239229261377.sh
Click to view CI Results
GitHub pull request #125 of commit 80521b272bf84315d24b2f8fb94a28011e4aedf3, no merge conflicts. Running as SYSTEM Setting status of 80521b272bf84315d24b2f8fb94a28011e4aedf3 to PENDING with url https://10.20.13.93:8080/job/merlin_systems/120/console and message: 'Pending' Using context: Jenkins Building on master in workspace /var/jenkins_home/workspace/merlin_systems using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e > git rev-parse --is-inside-work-tree # timeout=10 Fetching changes from the remote Git repository > git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10 Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems > git --version # timeout=10 using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken > git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10 > git rev-parse 80521b272bf84315d24b2f8fb94a28011e4aedf3^{commit} # timeout=10 Checking out Revision 80521b272bf84315d24b2f8fb94a28011e4aedf3 (detached) > git config core.sparsecheckout # timeout=10 > git checkout -f 80521b272bf84315d24b2f8fb94a28011e4aedf3 # timeout=10 Commit message: "got hugectr wrapper op PredictHugeCTR working correctly" > git rev-list --no-walk 1bbda7b9aedf11d2bc56b4542a26f7a3db8872fb # timeout=10 [merlin_systems] $ /bin/bash /tmp/jenkins1443726222047821983.sh ============================= test session starts ============================== platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0 rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0 collected 49 itemstests/unit/test_version.py . [ 2%] tests/unit/systems/test_ensemble.py .... [ 10%] tests/unit/systems/test_ensemble_ops.py .. [ 14%] tests/unit/systems/test_export.py . [ 16%] tests/unit/systems/test_graph.py . [ 18%] tests/unit/systems/test_inference_ops.py .. [ 22%] tests/unit/systems/test_op_runner.py .... [ 30%] tests/unit/systems/test_tensorflow_inf_op.py ... [ 36%] tests/unit/systems/fil/test_fil.py .......................... [ 89%] tests/unit/systems/fil/test_forest.py F.. [ 95%] tests/unit/systems/hugectr/test_hugectr.py sF [100%]
=================================== FAILURES =================================== ____________________________ test_load_from_config _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-7/test_load_from_config0')
def test_load_from_config(tmpdir): rows = 200 num_features = 16 X, y = sklearn.datasets.make_regression( n_samples=rows, n_features=num_features, n_informative=num_features // 3, random_state=0, ) model = xgboost.XGBRegressor() model.fit(X, y) feature_names = [str(i) for i in range(num_features)] input_schema = Schema([ColumnSchema(col, dtype=np.float32) for col in feature_names]) output_schema = Schema([ColumnSchema("output__0", dtype=np.float32)]) config = PredictForest(model, input_schema).export( tmpdir, input_schema, output_schema, node_id=2 ) node_config = json.loads(config.parameters[config.name].string_value)
assert json.loads(node_config["output_dict"]) == {
"output__0": {"dtype": "float32", "is_list": False, "is_ragged": False} }
E AssertionError: assert {'output__0':..., 'tags': []}} == {'output__0':...gged': False}} E Differing items: E {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False, 'tags': []}} != {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False}} E Full diff: E { E 'output__0': {'dtype': 'float32', E 'is_list': False, E - 'is_ragged': False}, E ? - E + 'is_ragged': False, E + 'tags': []}, E }
tests/unit/systems/fil/test_forest.py:57: AssertionError _____________________________ test_predict_hugectr _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0')
def test_predict_hugectr(tmpdir): cat_dtypes = {"a": int, "b": int, "c": int} categorical_columns = ["a", "b", "c"] gdf = make_df( { "a": np.arange(64, dtype=np.int64), "b": np.arange(64, dtype=np.int64), "c": np.arange(64, dtype=np.int64), "d": np.random.rand(64).tolist(), "label": [0] * 64, }, ) gdf["label"] = gdf["label"].astype("float32") gdf["d"] = gdf["d"].astype("float32") train_dataset = nvt.Dataset(gdf) dense_columns = ["d"] dict_dtypes = {} col_schemas = train_dataset.schema.column_schemas for col in dense_columns: col_schemas[col] = col_schemas[col].with_tags(Tags.CONTINUOUS) dict_dtypes[col] = np.float32 for col in categorical_columns: col_schemas[col] = col_schemas[col].with_tags(Tags.CATEGORICAL) dict_dtypes[col] = np.int64 for col in ["label"]: col_schemas[col] = col_schemas[col].with_tags(Tags.TARGET) dict_dtypes[col] = np.float32 train_path = os.path.join(tmpdir, "train/") os.mkdir(train_path) train_dataset.to_parquet( output_path=train_path, shuffle=nvt.io.Shuffle.PER_PARTITION, cats=categorical_columns, conts=dense_columns, labels=["label"], dtypes=dict_dtypes, ) embeddings = {"a": (64, 16), "b": (64, 16), "c": (64, 16)} total_cardinality = 0 slot_sizes = [] for column in cat_dtypes: slot_sizes.append(embeddings[column][0]) total_cardinality += embeddings[column][0] # slot sizes = list of caridinalities per column, total is sum of individual model = _run_model(slot_sizes, train_path, len(dense_columns)) model_op = PredictHugeCTR(model, train_dataset.schema, max_nnz=2, device_list=[0]) model_repository_path = os.path.join(tmpdir, "model_repository") input_schema = train_dataset.schema triton_chain = input_schema.column_names >> model_op ens = Ensemble(triton_chain, input_schema) os.makedirs(model_repository_path) enc_config, node_configs = ens.export(model_repository_path) assert enc_config assert len(node_configs) == 1 assert node_configs[0].name == "0_predicthugectr" df = train_dataset.to_ddf().compute()[:5] dense, cats, rowptr = _convert(df, slot_sizes, categorical_columns, labels=["label"])
response = _run_ensemble_on_tritonserver(
model_repository_path, ["OUTPUT0"], df, "ensemble_model", backend_config=f"hugectr,ps={tmpdir}/model_repository/ps.json", )
tests/unit/systems/hugectr/test_hugectr.py:314:
tests/unit/systems/utils/triton.py:39: in _run_ensemble_on_tritonserver with run_triton_server(tmpdir, backend_config=backend_config) as client: /usr/lib/python3.8/contextlib.py:113: in enter return next(self.gen)
modelpath = '/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository' backend_config = 'hugectr,ps=/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/ps.json'
@contextlib.contextmanager def run_triton_server(modelpath, backend_config="tensorflow,version=2"): """This function starts up a Triton server instance and returns a client to it. Parameters ---------- modelpath : string The path to the model to load. Yields ------ client: tritonclient.InferenceServerClient The client connected to the Triton server. """ cmdline = [ TRITON_SERVER_PATH, "--model-repository", modelpath, f"--backend-config={backend_config}", ] env = os.environ.copy() env["CUDA_VISIBLE_DEVICES"] = "0" with subprocess.Popen(cmdline, env=env) as process: try: with grpcclient.InferenceServerClient("localhost:8001") as client: # wait until server is ready for _ in range(60): if process.poll() is not None: retcode = process.returncode
raise RuntimeError(f"Tritonserver failed to start (ret={retcode})")
E RuntimeError: Tritonserver failed to start (ret=1)
merlin/systems/triton/utils.py:46: RuntimeError ----------------------------- Captured stdout call ----------------------------- HugeCTR Version: 3.7 ====================================================Model Init===================================================== [HCTR][15:29:13.759][WARNING][RK0][main]: The model name is not specified when creating the solver. [HCTR][15:29:13.759][WARNING][RK0][main]: MPI was already initialized somewhere elese. Lifetime service disabled. [HCTR][15:29:13.759][INFO][RK0][main]: Global seed is 2631192487 [HCTR][15:29:13.801][INFO][RK0][main]: Device to NUMA mapping: GPU 0 -> node 0 [HCTR][15:29:14.358][WARNING][RK0][main]: Peer-to-peer access cannot be fully enabled. [HCTR][15:29:14.358][INFO][RK0][main]: Start all2all warmup [HCTR][15:29:14.358][INFO][RK0][main]: End all2all warmup [HCTR][15:29:14.358][INFO][RK0][main]: Using All-reduce algorithm: NCCL [HCTR][15:29:14.359][INFO][RK0][main]: Device 0: Tesla P100-DGXS-16GB [HCTR][15:29:14.359][INFO][RK0][main]: num of DataReader workers: 1 [HCTR][15:29:14.359][INFO][RK0][main]: Vocabulary size: 0 [HCTR][15:29:14.359][INFO][RK0][main]: max_vocabulary_size_per_gpu_=584362 [HCTR][15:29:14.359][DEBUG][RK0][tid #140578230626048]: file_name_ /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][15:29:14.360][DEBUG][RK0][tid #140578222233344]: file_name_ /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][15:29:14.361][INFO][RK0][main]: Graph analysis to resolve tensor dependency ===================================================Model Compile=================================================== [HCTR][15:29:14.654][INFO][RK0][main]: gpu0 start to init embedding [HCTR][15:29:14.654][INFO][RK0][main]: gpu0 init embedding done [HCTR][15:29:14.655][INFO][RK0][main]: Starting AUC NCCL warm-up [HCTR][15:29:14.656][INFO][RK0][main]: Warm-up done ===================================================Model Summary=================================================== [HCTR][15:29:14.656][INFO][RK0][main]: label Dense Sparse
label dense data1
(None, 1) (None, 1)
—————————————————————————————————————————————————————————————————————————————————————————————————————————————————— Layer Type Input Name Output Name Output Shape
—————————————————————————————————————————————————————————————————————————————————————————————————————————————————— DistributedSlotSparseEmbeddingHash data1 sparse_embedding1 (None, 3, 16)InnerProduct dense fc1 (None, 512)
Reshape sparse_embedding1 reshape1 (None, 48)
InnerProduct reshape1 fc2 (None, 1)
fc1BinaryCrossEntropyLoss fc2 loss
label=====================================================Model Fit===================================================== [HCTR][15:29:14.656][INFO][RK0][main]: Use non-epoch mode with number of iterations: 20 [HCTR][15:29:14.656][INFO][RK0][main]: Training batchsize: 10, evaluation batchsize: 10 [HCTR][15:29:14.656][INFO][RK0][main]: Evaluation interval: 200, snapshot interval: 10 [HCTR][15:29:14.656][INFO][RK0][main]: Dense network trainable: True [HCTR][15:29:14.656][INFO][RK0][main]: Sparse embedding sparse_embedding1 trainable: True [HCTR][15:29:14.656][INFO][RK0][main]: Use mixed precision: False, scaler: 1.000000, use cuda graph: True [HCTR][15:29:14.656][INFO][RK0][main]: lr: 0.001000, warmup_steps: 1, end_lr: 0.000000 [HCTR][15:29:14.656][INFO][RK0][main]: decay_start: 0, decay_steps: 1, decay_power: 2.000000 [HCTR][15:29:14.656][INFO][RK0][main]: Training source file: /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/train/file_list.txt [HCTR][15:29:14.656][INFO][RK0][main]: Evaluation source file: /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/train/file_list.txt [HCTR][15:29:14.661][DEBUG][RK0][tid #140578230626048]: file_name /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/train/part_0.parquet file_total_rows 64 [HCTR][15:29:14.666][DEBUG][RK0][tid #140578230626048]: file_name_ /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][15:29:14.670][INFO][RK0][main]: Rank0: Write hash table to file [HCTR][15:29:14.671][INFO][RK0][main]: Dumping sparse weights to files, successful [HCTR][15:29:14.695][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][15:29:14.731][INFO][RK0][main]: Done [HCTR][15:29:14.750][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][15:29:14.787][INFO][RK0][main]: Done [HCTR][15:29:14.789][INFO][RK0][main]: Dumping sparse optimzer states to files, successful [HCTR][15:29:14.789][INFO][RK0][main]: Dumping dense weights to file, successful [HCTR][15:29:14.789][INFO][RK0][main]: Dumping dense optimizer states to file, successful [HCTR][15:29:14.794][DEBUG][RK0][tid #140578230626048]: file_name_ /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][15:29:14.796][INFO][RK0][main]: Finish 20 iterations with batchsize: 10 in 0.14s. [HCTR][15:29:14.798][INFO][RK0][main]: Save the model graph to /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json successfully [HCTR][15:29:14.799][INFO][RK0][main]: Rank0: Write hash table to file [HCTR][15:29:14.799][INFO][RK0][main]: Dumping sparse weights to files, successful [HCTR][15:29:14.817][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][15:29:14.852][INFO][RK0][main]: Done [HCTR][15:29:14.871][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][15:29:14.908][INFO][RK0][main]: Done [HCTR][15:29:14.910][INFO][RK0][main]: Dumping sparse optimzer states to files, successful [HCTR][15:29:14.910][INFO][RK0][main]: Dumping dense weights to file, successful [HCTR][15:29:14.910][INFO][RK0][main]: Dumping dense optimizer states to file, successful [HCTR][15:29:17.797][INFO][RK0][main]: default_emb_vec_value is not specified using default: 0 [HCTR][15:29:17.797][INFO][RK0][main]: Creating HashMap CPU database backend... [HCTR][15:29:17.797][INFO][RK0][main]: Volatile DB: initial cache rate = 1 [HCTR][15:29:17.797][INFO][RK0][main]: Volatile DB: cache missed embeddings = 0 [HCTR][15:29:18.056][INFO][RK0][main]: Table: hps_et.0_hugectr.sparse_embedding1; cached 64 / 64 embeddings in volatile database (PreallocatedHashMapBackend); load: 64 / 18446744073709551615 (0.00%). [HCTR][15:29:18.057][DEBUG][RK0][main]: Real-time subscribers created! [HCTR][15:29:18.057][INFO][RK0][main]: Create embedding cache in device 0. [HCTR][15:29:18.057][INFO][RK0][main]: Use GPU embedding cache: True, cache size percentage: 0.500000 [HCTR][15:29:18.057][INFO][RK0][main]: Configured cache hit rate threshold: 0.900000 [HCTR][15:29:18.057][INFO][RK0][main]: The size of thread pool: 16 [HCTR][15:29:18.057][INFO][RK0][main]: The size of worker memory pool: 4 [HCTR][15:29:18.057][INFO][RK0][main]: The size of refresh memory pool: 1 [HCTR][15:29:18.074][INFO][RK0][main]: Global seed is 1817110376 [HCTR][15:29:18.699][WARNING][RK0][main]: Peer-to-peer access cannot be fully enabled. [HCTR][15:29:18.699][INFO][RK0][main]: Start all2all warmup [HCTR][15:29:18.699][INFO][RK0][main]: End all2all warmup [HCTR][15:29:18.700][INFO][RK0][main]: Create inference session on device: 0 [HCTR][15:29:18.700][INFO][RK0][main]: Model name: 0_hugectr [HCTR][15:29:18.700][INFO][RK0][main]: Use mixed precision: False [HCTR][15:29:18.700][INFO][RK0][main]: Use cuda graph: True [HCTR][15:29:18.700][INFO][RK0][main]: Max batchsize: 64 [HCTR][15:29:18.700][INFO][RK0][main]: Use I64 input key: True [HCTR][15:29:18.700][INFO][RK0][main]: start create embedding for inference [HCTR][15:29:18.700][INFO][RK0][main]: sparse_input name data1 [HCTR][15:29:18.700][INFO][RK0][main]: create embedding for inference success [HCTR][15:29:18.700][INFO][RK0][main]: Inference stage skip BinaryCrossEntropyLoss layer, replaced by Sigmoid layer ----------------------------- Captured stderr call ----------------------------- I0701 15:29:15.199327 7869 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7f2414000000' with size 268435456 I0701 15:29:15.200112 7869 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864 I0701 15:29:15.203367 7869 model_repository_manager.cc:1191] loading: 0_predicthugectr:1 I0701 15:29:15.303720 7869 model_repository_manager.cc:1191] loading: 0_hugectr:1 I0701 15:29:15.311508 7869 python.cc:2388] TRITONBACKEND_ModelInstanceInitialize: 0_predicthugectr (GPU device 0) 0701 15:29:17.406730 7909 pb_stub.cc:301] Failed to initialize Python stub: ModuleNotFoundError: No module named 'merlin.systems.dag.ops.hugectr'
At:
(973): _find_and_load_unlocked (991): _find_and_load (1014): _gcd_import /usr/lib/python3.8/importlib/init.py(127): import_module /usr/local/lib/python3.8/dist-packages/merlin/systems/dag/op_runner.py(30): init /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_predicthugectr/1/model.py(66): initialize E0701 15:29:17.780691 7869 model_repository_manager.cc:1348] failed to load '0_predicthugectr' version 1: Internal: ModuleNotFoundError: No module named 'merlin.systems.dag.ops.hugectr'
At:
(973): _find_and_load_unlocked (991): _find_and_load (1014): _gcd_import /usr/lib/python3.8/importlib/init.py(127): import_module /usr/local/lib/python3.8/dist-packages/merlin/systems/dag/op_runner.py(30): init /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_predicthugectr/1/model.py(66): initialize I0701 15:29:17.796763 7869 hugectr.cc:1738] TRITONBACKEND_Initialize: hugectr I0701 15:29:17.796790 7869 hugectr.cc:1745] Triton TRITONBACKEND API version: 1.9 I0701 15:29:17.796800 7869 hugectr.cc:1749] 'hugectr' TRITONBACKEND API version: 1.9 I0701 15:29:17.796807 7869 hugectr.cc:1772] The HugeCTR backend Repository location: /opt/tritonserver/backends/hugectr I0701 15:29:17.796815 7869 hugectr.cc:1781] The HugeCTR backend configuration: {"cmdline":{"auto-complete-config":"false","backend-directory":"/opt/tritonserver/backends","min-compute-capability":"6.000000","ps":"/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/ps.json","default-max-batch-size":"4"}} I0701 15:29:17.796839 7869 hugectr.cc:345] *****Parsing Parameter Server Configuration from /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/ps.json I0701 15:29:17.796897 7869 hugectr.cc:366] Support 64-bit keys = 1 I0701 15:29:17.796932 7869 hugectr.cc:591] Model name = 0_hugectr I0701 15:29:17.796941 7869 hugectr.cc:600] Model '0_hugectr' -> network file = /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json I0701 15:29:17.796949 7869 hugectr.cc:607] Model '0_hugectr' -> max. batch size = 64 I0701 15:29:17.796955 7869 hugectr.cc:613] Model '0_hugectr' -> dense model file = /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/_dense_0.model I0701 15:29:17.796965 7869 hugectr.cc:619] Model '0_hugectr' -> sparse model files = [/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_sparse_0.model] I0701 15:29:17.796973 7869 hugectr.cc:630] Model '0_hugectr' -> use GPU embedding cache = 1 I0701 15:29:17.796990 7869 hugectr.cc:639] Model '0_hugectr' -> hit rate threshold = 0.9 I0701 15:29:17.796998 7869 hugectr.cc:647] Model '0_hugectr' -> per model GPU cache = 0.5 I0701 15:29:17.797015 7869 hugectr.cc:664] Model '0_hugectr' -> use_mixed_precision = 0 I0701 15:29:17.797024 7869 hugectr.cc:671] Model '0_hugectr' -> scaler = 1 I0701 15:29:17.797031 7869 hugectr.cc:677] Model '0_hugectr' -> use_algorithm_search = 1 I0701 15:29:17.797037 7869 hugectr.cc:685] Model '0_hugectr' -> use_cuda_graph = 1 I0701 15:29:17.797044 7869 hugectr.cc:692] Model '0_hugectr' -> num. pool worker buffers = 4 I0701 15:29:17.797051 7869 hugectr.cc:700] Model '0_hugectr' -> num. pool refresh buffers = 1 I0701 15:29:17.797081 7869 hugectr.cc:708] Model '0_hugectr' -> cache refresh rate per iteration = 0.2 I0701 15:29:17.797090 7869 hugectr.cc:717] Model '0_hugectr' -> deployed device list = [0] I0701 15:29:17.797098 7869 hugectr.cc:725] Model '0_hugectr' -> default value for each table = [0] I0701 15:29:17.797105 7869 hugectr.cc:733] Model '0_hugectr' -> maxnum_des_feature_per_sample = 1 I0701 15:29:17.797112 7869 hugectr.cc:741] Model '0_hugectr' -> refresh_delay = 0 I0701 15:29:17.797118 7869 hugectr.cc:747] Model '0_hugectr' -> refresh_interval = 0 I0701 15:29:17.797126 7869 hugectr.cc:755] Model '0_hugectr' -> maxnum_catfeature_query_per_table_per_sample list = [3] I0701 15:29:17.797133 7869 hugectr.cc:766] Model '0_hugectr' -> embedding_vecsize_per_table list = [16] I0701 15:29:17.797142 7869 hugectr.cc:773] Model '0_hugectr' -> embedding model names = [, sparse_embedding1] I0701 15:29:17.797148 7869 hugectr.cc:780] Model '0_hugectr' -> label_dim = 1 I0701 15:29:17.797154 7869 hugectr.cc:785] Model '0_hugectr' -> the number of slots = 3 I0701 15:29:17.797167 7869 hugectr.cc:806] *****The HugeCTR Backend Parameter Server is creating... ***** I0701 15:29:17.797315 7869 hugectr.cc:814] ***** Parameter Server(Int64) is creating... ***** I0701 15:29:18.062446 7869 hugectr.cc:825] *****The HugeCTR Backend Backend created the Parameter Server successfully! ***** I0701 15:29:18.062495 7869 hugectr.cc:1844] TRITONBACKEND_ModelInitialize: 0_hugectr (version 1) I0701 15:29:18.062503 7869 hugectr.cc:1857] Repository location: /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr I0701 15:29:18.062510 7869 hugectr.cc:1872] backend configuration in mode: {"cmdline":{"auto-complete-config":"false","backend-directory":"/opt/tritonserver/backends","min-compute-capability":"6.000000","ps":"/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/ps.json","default-max-batch-size":"4"}} I0701 15:29:18.062521 7869 hugectr.cc:1888] Parsing the latest Parameter Server json config file for deploying model 0_hugectr online I0701 15:29:18.062528 7869 hugectr.cc:1893] Hierarchical PS version is 0 and the current Model Version is 1 I0701 15:29:18.062534 7869 hugectr.cc:345] *****Parsing Parameter Server Configuration from /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/ps.json I0701 15:29:18.062576 7869 hugectr.cc:366] Support 64-bit keys = 1 I0701 15:29:18.062597 7869 hugectr.cc:591] Model name = 0_hugectr I0701 15:29:18.062606 7869 hugectr.cc:600] Model '0_hugectr' -> network file = /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json I0701 15:29:18.062613 7869 hugectr.cc:607] Model '0_hugectr' -> max. batch size = 64 I0701 15:29:18.062619 7869 hugectr.cc:613] Model '0_hugectr' -> dense model file = /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/_dense_0.model I0701 15:29:18.062628 7869 hugectr.cc:619] Model '0_hugectr' -> sparse model files = [/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_sparse_0.model] I0701 15:29:18.062635 7869 hugectr.cc:630] Model '0_hugectr' -> use GPU embedding cache = 1 I0701 15:29:18.062645 7869 hugectr.cc:639] Model '0_hugectr' -> hit rate threshold = 0.9 I0701 15:29:18.062653 7869 hugectr.cc:647] Model '0_hugectr' -> per model GPU cache = 0.5 I0701 15:29:18.062666 7869 hugectr.cc:664] Model '0_hugectr' -> use_mixed_precision = 0 I0701 15:29:18.062674 7869 hugectr.cc:671] Model '0_hugectr' -> scaler = 1 I0701 15:29:18.062680 7869 hugectr.cc:677] Model '0_hugectr' -> use_algorithm_search = 1 I0701 15:29:18.062686 7869 hugectr.cc:685] Model '0_hugectr' -> use_cuda_graph = 1 I0701 15:29:18.062693 7869 hugectr.cc:692] Model '0_hugectr' -> num. pool worker buffers = 4 I0701 15:29:18.062700 7869 hugectr.cc:700] Model '0_hugectr' -> num. pool refresh buffers = 1 I0701 15:29:18.062707 7869 hugectr.cc:708] Model '0_hugectr' -> cache refresh rate per iteration = 0.2 I0701 15:29:18.062714 7869 hugectr.cc:717] Model '0_hugectr' -> deployed device list = [0] I0701 15:29:18.062746 7869 hugectr.cc:725] Model '0_hugectr' -> default value for each table = [0] I0701 15:29:18.062753 7869 hugectr.cc:733] Model '0_hugectr' -> maxnum_des_feature_per_sample = 1 I0701 15:29:18.062759 7869 hugectr.cc:741] Model '0_hugectr' -> refresh_delay = 0 I0701 15:29:18.062766 7869 hugectr.cc:747] Model '0_hugectr' -> refresh_interval = 0 I0701 15:29:18.062773 7869 hugectr.cc:755] Model '0_hugectr' -> maxnum_catfeature_query_per_table_per_sample list = [3] I0701 15:29:18.062780 7869 hugectr.cc:766] Model '0_hugectr' -> embedding_vecsize_per_table list = [16] I0701 15:29:18.062788 7869 hugectr.cc:773] Model '0_hugectr' -> embedding model names = [, sparse_embedding1] I0701 15:29:18.062794 7869 hugectr.cc:780] Model '0_hugectr' -> label_dim = 1 I0701 15:29:18.062800 7869 hugectr.cc:785] Model '0_hugectr' -> the number of slots = 3 I0701 15:29:18.063198 7869 hugectr.cc:1078] Verifying model configuration: { "name": "0_hugectr", "platform": "", "backend": "hugectr", "version_policy": { "latest": { "num_versions": 1 } }, "max_batch_size": 64, "input": [ { "name": "DES", "data_type": "TYPE_FP32", "format": "FORMAT_NONE", "dims": [ -1 ], "is_shape_tensor": false, "allow_ragged_batch": false, "optional": false }, { "name": "CATCOLUMN", "data_type": "TYPE_INT64", "format": "FORMAT_NONE", "dims": [ -1 ], "is_shape_tensor": false, "allow_ragged_batch": false, "optional": false }, { "name": "ROWINDEX", "data_type": "TYPE_INT32", "format": "FORMAT_NONE", "dims": [ -1 ], "is_shape_tensor": false, "allow_ragged_batch": false, "optional": false } ], "output": [ { "name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [ -1 ], "label_filename": "", "is_shape_tensor": false } ], "batch_input": [], "batch_output": [], "optimization": { "priority": "PRIORITY_DEFAULT", "input_pinned_memory": { "enable": true }, "output_pinned_memory": { "enable": true }, "gather_kernel_buffer_threshold": 0, "eager_batching": false }, "instance_group": [ { "name": "0_hugectr_0", "kind": "KIND_GPU", "count": 1, "gpus": [ 0 ], "secondary_devices": [], "profile": [], "passive": false, "host_policy": "" } ], "default_model_filename": "", "cc_model_filenames": {}, "metric_tags": {}, "parameters": { "label_dim": { "string_value": "1" }, "max_nnz": { "string_value": "2" }, "gpucacheper": { "string_value": "0.5" }, "embedding_vector_size": { "string_value": "16" }, "des_feature_num": { "string_value": "1" }, "slot_sizes": { "string_value": "[[64, 64, 64]]" }, "gpucache": { "string_value": "true" }, "embeddingkey_long_type": { "string_value": "true" }, "slots": { "string_value": "3" }, "cat_feature_num": { "string_value": "3" }, "config": { "string_value": "/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json" } }, "model_warmup": [] } I0701 15:29:18.063227 7869 hugectr.cc:1164] The model configuration: { "name": "0_hugectr", "platform": "", "backend": "hugectr", "version_policy": { "latest": { "num_versions": 1 } }, "max_batch_size": 64, "input": [ { "name": "DES", "data_type": "TYPE_FP32", "format": "FORMAT_NONE", "dims": [ -1 ], "is_shape_tensor": false, "allow_ragged_batch": false, "optional": false }, { "name": "CATCOLUMN", "data_type": "TYPE_INT64", "format": "FORMAT_NONE", "dims": [ -1 ], "is_shape_tensor": false, "allow_ragged_batch": false, "optional": false }, { "name": "ROWINDEX", "data_type": "TYPE_INT32", "format": "FORMAT_NONE", "dims": [ -1 ], "is_shape_tensor": false, "allow_ragged_batch": false, "optional": false } ], "output": [ { "name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [ -1 ], "label_filename": "", "is_shape_tensor": false } ], "batch_input": [], "batch_output": [], "optimization": { "priority": "PRIORITY_DEFAULT", "input_pinned_memory": { "enable": true }, "output_pinned_memory": { "enable": true }, "gather_kernel_buffer_threshold": 0, "eager_batching": false }, "instance_group": [ { "name": "0_hugectr_0", "kind": "KIND_GPU", "count": 1, "gpus": [ 0 ], "secondary_devices": [], "profile": [], "passive": false, "host_policy": "" } ], "default_model_filename": "", "cc_model_filenames": {}, "metric_tags": {}, "parameters": { "label_dim": { "string_value": "1" }, "max_nnz": { "string_value": "2" }, "gpucacheper": { "string_value": "0.5" }, "embedding_vector_size": { "string_value": "16" }, "des_feature_num": { "string_value": "1" }, "slot_sizes": { "string_value": "[[64, 64, 64]]" }, "gpucache": { "string_value": "true" }, "embeddingkey_long_type": { "string_value": "true" }, "slots": { "string_value": "3" }, "cat_feature_num": { "string_value": "3" }, "config": { "string_value": "/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json" } }, "model_warmup": [] } I0701 15:29:18.063249 7869 hugectr.cc:1209] slots set = 3 I0701 15:29:18.063256 7869 hugectr.cc:1213] slots set = 3 I0701 15:29:18.063263 7869 hugectr.cc:1221] desene number = 1 I0701 15:29:18.063270 7869 hugectr.cc:1239] The max categorical feature number = 3 I0701 15:29:18.063278 7869 hugectr.cc:1244] embedding size = 16 I0701 15:29:18.063284 7869 hugectr.cc:1250] embedding size = 16 I0701 15:29:18.063291 7869 hugectr.cc:1256] maxnnz = 2 I0701 15:29:18.063299 7869 hugectr.cc:1265] refresh_interval = 0 I0701 15:29:18.063306 7869 hugectr.cc:1273] refresh_delay = 0 I0701 15:29:18.063313 7869 hugectr.cc:1281] HugeCTR model config path = /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json I0701 15:29:18.063321 7869 hugectr.cc:1329] support mixed_precision = 0 I0701 15:29:18.063331 7869 hugectr.cc:1348] gpu cache per = 0.5 I0701 15:29:18.063339 7869 hugectr.cc:1366] hit-rate threshold = 0.9 I0701 15:29:18.063345 7869 hugectr.cc:1374] Label dim = 1 I0701 15:29:18.063353 7869 hugectr.cc:1383] support 64-bit embedding key = 1 I0701 15:29:18.063359 7869 hugectr.cc:1394] Model_Inference_Para.max_batchsize: 64 I0701 15:29:18.063372 7869 hugectr.cc:1398] max_batch_size in model config.pbtxt is 64 I0701 15:29:18.063380 7869 hugectr.cc:1468] ******Creating Embedding Cache for model 0_hugectr in device 0 I0701 15:29:18.063386 7869 hugectr.cc:1495] ******Creating Embedding Cache for model 0_hugectr successfully I0701 15:29:18.063743 7869 hugectr.cc:1996] TRITONBACKEND_ModelInstanceInitialize: 0_hugectr_0 (device 0) I0701 15:29:18.063755 7869 hugectr.cc:1637] Triton Model Instance Initialization on device 0 I0701 15:29:18.063762 7869 hugectr.cc:1647] Dense Feature buffer allocation: I0701 15:29:18.074275 7869 hugectr.cc:1654] Categorical Feature buffer allocation: I0701 15:29:18.074316 7869 hugectr.cc:1672] Categorical Row Index buffer allocation: I0701 15:29:18.074330 7869 hugectr.cc:1680] Predict result buffer allocation: I0701 15:29:18.074344 7869 hugectr.cc:2009] Loading HugeCTR Model I0701 15:29:18.074351 7869 hugectr.cc:1698] The model origin json configuration file path is: /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json I0701 15:29:18.847134 7869 hugectr.cc:1706] ******Loading HugeCTR model successfully I0701 15:29:18.847304 7869 model_repository_manager.cc:1345] successfully loaded '0_hugectr' version 1 E0701 15:29:18.847379 7869 model_repository_manager.cc:1551] Invalid argument: ensemble 'ensemble_model' depends on '0_predicthugectr' which has no loaded version I0701 15:29:18.847631 7869 server.cc:556] +------------------+------+ | Repository Agent | Path | +------------------+------+ +------------------+------+
I0701 15:29:18.847702 7869 server.cc:583] +---------+---------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Backend | Path | Config | +---------+---------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | python | /opt/tritonserver/backends/python/libtriton_python.so | {"cmdline":{"auto-complete-config":"false","min-compute-capability":"6.000000","backend-directory":"/opt/tritonserver/backends","default-max-batch-size":"4"}} | | hugectr | /opt/tritonserver/backends/hugectr/libtriton_hugectr.so | {"cmdline":{"auto-complete-config":"false","backend-directory":"/opt/tritonserver/backends","min-compute-capability":"6.000000","ps":"/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/ps.json","default-max-batch-size":"4"}} | +---------+---------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0701 15:29:18.847764 7869 server.cc:626] +------------------+---------+----------------------------------------------------------------------------------------------------------------------+ | Model | Version | Status | +------------------+---------+----------------------------------------------------------------------------------------------------------------------+ | 0_hugectr | 1 | READY | | 0_predicthugectr | 1 | UNAVAILABLE: Internal: ModuleNotFoundError: No module named 'merlin.systems.dag.ops.hugectr' | | | | | | | | At: | | | |
(973): _find_and_load_unlocked | | | | (991): _find_and_load | | | | (1014): _gcd_import | | | | /usr/lib/python3.8/importlib/init.py(127): import_module | | | | /usr/local/lib/python3.8/dist-packages/merlin/systems/dag/op_runner.py(30): init | | | | /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_predicthugectr/1/model.py(66): initialize | +------------------+---------+----------------------------------------------------------------------------------------------------------------------+ I0701 15:29:18.875669 7869 metrics.cc:650] Collecting metrics for GPU 0: Tesla P100-DGXS-16GB I0701 15:29:18.876569 7869 tritonserver.cc:2138] +----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Option | Value | +----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | server_id | triton | | server_version | 2.22.0 | | server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace | | model_repository_path[0] | /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository | | model_control_mode | MODE_NONE | | strict_model_config | 1 | | rate_limit | OFF | | pinned_memory_pool_byte_size | 268435456 | | cuda_memory_pool_byte_size{0} | 67108864 | | response_cache_byte_size | 0 | | min_supported_compute_capability | 6.0 | | strict_readiness | 1 | | exit_timeout | 30 | +----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0701 15:29:18.876602 7869 server.cc:257] Waiting for in-flight requests to complete. I0701 15:29:18.876611 7869 server.cc:273] Timeout 30: Found 0 model versions that have in-flight inferences I0701 15:29:18.876621 7869 model_repository_manager.cc:1223] unloading: 0_hugectr:1 I0701 15:29:18.876657 7869 server.cc:288] All models are stopped, unloading models I0701 15:29:18.876665 7869 server.cc:295] Timeout 30: Found 1 live models and 0 in-flight non-inference requests I0701 15:29:18.876749 7869 hugectr.cc:2026] TRITONBACKEND_ModelInstanceFinalize: delete instance state I0701 15:29:18.890774 7869 hugectr.cc:1957] TRITONBACKEND_ModelFinalize: delete model state I0701 15:29:18.899492 7869 hugectr.cc:1505] ******Destorying Embedding Cache for model 0_hugectr successfully I0701 15:29:18.899532 7869 model_repository_manager.cc:1328] successfully unloaded '0_hugectr' version 1 I0701 15:29:19.876698 7869 server.cc:295] Timeout 29: Found 0 live models and 0 in-flight non-inference requests I0701 15:29:19.876772 7869 hugectr.cc:1827] TRITONBACKEND_Backend Finalize: HugectrBackend W0701 15:29:19.995394 7869 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0 W0701 15:29:19.995462 7869 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0 error: creating server: Internal - failed to load all models W0701 15:29:20.995623 7869 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0 W0701 15:29:20.995678 7869 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0 =============================== warnings summary =============================== ../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18 /var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The
nvtabular.framework_utils
module is being replaced by the Merlin Models library. Support for importing fromnvtabular.framework_utils
is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead. warnings.warn(tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet] tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet] tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet] /usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility. warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/fil/test_fil.py::test_binary_classifier_default[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_binary_classifier_with_proba[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_multi_classifier[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_regressor[sklearn_forest_regressor-get_model_params4] tests/unit/systems/fil/test_fil.py::test_model_file[sklearn_forest_regressor-checkpoint.tl] /usr/local/lib/python3.8/dist-packages/sklearn/utils/deprecation.py:103: FutureWarning: Attribute
n_features_
was deprecated in version 1.0 and will be removed in 1.2. Usen_features_in_
instead. warnings.warn(msg, category=FutureWarning)-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html =========================== short test summary info ============================ FAILED tests/unit/systems/fil/test_forest.py::test_load_from_config - Asserti... FAILED tests/unit/systems/hugectr/test_hugectr.py::test_predict_hugectr - Run... ======= 2 failed, 46 passed, 1 skipped, 18 warnings in 173.06s (0:02:53) ======= Build step 'Execute shell' marked build as failure Performing Post build task... Match found for : : True Logical operation result is TRUE Running script : #!/bin/bash cd /var/jenkins_home/ CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log" [merlin_systems] $ /bin/bash /tmp/jenkins4678809899471159306.sh
Click to view CI Results
GitHub pull request #125 of commit 088570474e008fa0580cb7ae6de1c4a2bceadf4e, no merge conflicts. Running as SYSTEM Setting status of 088570474e008fa0580cb7ae6de1c4a2bceadf4e to PENDING with url https://10.20.13.93:8080/job/merlin_systems/121/console and message: 'Pending' Using context: Jenkins Building on master in workspace /var/jenkins_home/workspace/merlin_systems using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e > git rev-parse --is-inside-work-tree # timeout=10 Fetching changes from the remote Git repository > git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10 Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems > git --version # timeout=10 using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken > git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10 > git rev-parse 088570474e008fa0580cb7ae6de1c4a2bceadf4e^{commit} # timeout=10 Checking out Revision 088570474e008fa0580cb7ae6de1c4a2bceadf4e (detached) > git config core.sparsecheckout # timeout=10 > git checkout -f 088570474e008fa0580cb7ae6de1c4a2bceadf4e # timeout=10 Commit message: "added check for categorical columns in constructor" > git rev-list --no-walk 80521b272bf84315d24b2f8fb94a28011e4aedf3 # timeout=10 [merlin_systems] $ /bin/bash /tmp/jenkins16330791337592217810.sh ============================= test session starts ============================== platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0 rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0 collected 50 itemstests/unit/test_version.py . [ 2%] tests/unit/systems/test_ensemble.py .... [ 10%] tests/unit/systems/test_ensemble_ops.py .. [ 14%] tests/unit/systems/test_export.py . [ 16%] tests/unit/systems/test_graph.py . [ 18%] tests/unit/systems/test_inference_ops.py .. [ 22%] tests/unit/systems/test_op_runner.py .... [ 30%] tests/unit/systems/test_tensorflow_inf_op.py ... [ 36%] tests/unit/systems/fil/test_fil.py .......................... [ 88%] tests/unit/systems/fil/test_forest.py F.. [ 94%] tests/unit/systems/hugectr/test_hugectr.py sF. [100%]
=================================== FAILURES =================================== ____________________________ test_load_from_config _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-8/test_load_from_config0')
def test_load_from_config(tmpdir): rows = 200 num_features = 16 X, y = sklearn.datasets.make_regression( n_samples=rows, n_features=num_features, n_informative=num_features // 3, random_state=0, ) model = xgboost.XGBRegressor() model.fit(X, y) feature_names = [str(i) for i in range(num_features)] input_schema = Schema([ColumnSchema(col, dtype=np.float32) for col in feature_names]) output_schema = Schema([ColumnSchema("output__0", dtype=np.float32)]) config = PredictForest(model, input_schema).export( tmpdir, input_schema, output_schema, node_id=2 ) node_config = json.loads(config.parameters[config.name].string_value)
assert json.loads(node_config["output_dict"]) == {
"output__0": {"dtype": "float32", "is_list": False, "is_ragged": False} }
E AssertionError: assert {'output__0':..., 'tags': []}} == {'output__0':...gged': False}} E Differing items: E {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False, 'tags': []}} != {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False}} E Full diff: E { E 'output__0': {'dtype': 'float32', E 'is_list': False, E - 'is_ragged': False}, E ? - E + 'is_ragged': False, E + 'tags': []}, E }
tests/unit/systems/fil/test_forest.py:57: AssertionError _____________________________ test_predict_hugectr _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0')
def test_predict_hugectr(tmpdir): cat_dtypes = {"a": int, "b": int, "c": int} categorical_columns = ["a", "b", "c"] gdf = make_df( { "a": np.arange(64, dtype=np.int64), "b": np.arange(64, dtype=np.int64), "c": np.arange(64, dtype=np.int64), "d": np.random.rand(64).tolist(), "label": [0] * 64, }, ) gdf["label"] = gdf["label"].astype("float32") gdf["d"] = gdf["d"].astype("float32") train_dataset = nvt.Dataset(gdf) dense_columns = ["d"] dict_dtypes = {} col_schemas = train_dataset.schema.column_schemas for col in dense_columns: col_schemas[col] = col_schemas[col].with_tags(Tags.CONTINUOUS) dict_dtypes[col] = np.float32 for col in categorical_columns: col_schemas[col] = col_schemas[col].with_tags(Tags.CATEGORICAL) dict_dtypes[col] = np.int64 for col in ["label"]: col_schemas[col] = col_schemas[col].with_tags(Tags.TARGET) dict_dtypes[col] = np.float32 train_path = os.path.join(tmpdir, "train/") os.mkdir(train_path) train_dataset.to_parquet( output_path=train_path, shuffle=nvt.io.Shuffle.PER_PARTITION, cats=categorical_columns, conts=dense_columns, labels=["label"], dtypes=dict_dtypes, ) embeddings = {"a": (64, 16), "b": (64, 16), "c": (64, 16)} total_cardinality = 0 slot_sizes = [] for column in cat_dtypes: slot_sizes.append(embeddings[column][0]) total_cardinality += embeddings[column][0] # slot sizes = list of caridinalities per column, total is sum of individual model = _run_model(slot_sizes, train_path, len(dense_columns)) model_op = PredictHugeCTR(model, train_dataset.schema, max_nnz=2, device_list=[0]) model_repository_path = os.path.join(tmpdir, "model_repository") input_schema = train_dataset.schema triton_chain = input_schema.column_names >> model_op ens = Ensemble(triton_chain, input_schema) os.makedirs(model_repository_path) enc_config, node_configs = ens.export(model_repository_path) assert enc_config assert len(node_configs) == 1 assert node_configs[0].name == "0_predicthugectr" df = train_dataset.to_ddf().compute()[:5] dense, cats, rowptr = _convert(df, slot_sizes, categorical_columns, labels=["label"])
response = _run_ensemble_on_tritonserver(
model_repository_path, ["OUTPUT0"], df, "ensemble_model", backend_config=f"hugectr,ps={tmpdir}/model_repository/ps.json", )
tests/unit/systems/hugectr/test_hugectr.py:314:
tests/unit/systems/utils/triton.py:39: in _run_ensemble_on_tritonserver with run_triton_server(tmpdir, backend_config=backend_config) as client: /usr/lib/python3.8/contextlib.py:113: in enter return next(self.gen)
modelpath = '/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository' backend_config = 'hugectr,ps=/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/ps.json'
@contextlib.contextmanager def run_triton_server(modelpath, backend_config="tensorflow,version=2"): """This function starts up a Triton server instance and returns a client to it. Parameters ---------- modelpath : string The path to the model to load. Yields ------ client: tritonclient.InferenceServerClient The client connected to the Triton server. """ cmdline = [ TRITON_SERVER_PATH, "--model-repository", modelpath, f"--backend-config={backend_config}", ] env = os.environ.copy() env["CUDA_VISIBLE_DEVICES"] = "0" with subprocess.Popen(cmdline, env=env) as process: try: with grpcclient.InferenceServerClient("localhost:8001") as client: # wait until server is ready for _ in range(60): if process.poll() is not None: retcode = process.returncode
raise RuntimeError(f"Tritonserver failed to start (ret={retcode})")
E RuntimeError: Tritonserver failed to start (ret=1)
merlin/systems/triton/utils.py:46: RuntimeError ----------------------------- Captured stdout call ----------------------------- HugeCTR Version: 3.7 ====================================================Model Init===================================================== [HCTR][15:39:34.401][WARNING][RK0][main]: The model name is not specified when creating the solver. [HCTR][15:39:34.401][WARNING][RK0][main]: MPI was already initialized somewhere elese. Lifetime service disabled. [HCTR][15:39:34.401][INFO][RK0][main]: Global seed is 511996212 [HCTR][15:39:34.444][INFO][RK0][main]: Device to NUMA mapping: GPU 0 -> node 0 [HCTR][15:39:35.021][WARNING][RK0][main]: Peer-to-peer access cannot be fully enabled. [HCTR][15:39:35.021][INFO][RK0][main]: Start all2all warmup [HCTR][15:39:35.021][INFO][RK0][main]: End all2all warmup [HCTR][15:39:35.022][INFO][RK0][main]: Using All-reduce algorithm: NCCL [HCTR][15:39:35.022][INFO][RK0][main]: Device 0: Tesla P100-DGXS-16GB [HCTR][15:39:35.022][INFO][RK0][main]: num of DataReader workers: 1 [HCTR][15:39:35.023][INFO][RK0][main]: Vocabulary size: 0 [HCTR][15:39:35.023][INFO][RK0][main]: max_vocabulary_size_per_gpu_=584362 [HCTR][15:39:35.023][DEBUG][RK0][tid #139698357917440]: file_name_ /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][15:39:35.024][DEBUG][RK0][tid #139698349524736]: file_name_ /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][15:39:35.025][INFO][RK0][main]: Graph analysis to resolve tensor dependency ===================================================Model Compile=================================================== [HCTR][15:39:35.316][INFO][RK0][main]: gpu0 start to init embedding [HCTR][15:39:35.317][INFO][RK0][main]: gpu0 init embedding done [HCTR][15:39:35.318][INFO][RK0][main]: Starting AUC NCCL warm-up [HCTR][15:39:35.319][INFO][RK0][main]: Warm-up done ===================================================Model Summary=================================================== [HCTR][15:39:35.319][INFO][RK0][main]: label Dense Sparse
label dense data1
(None, 1) (None, 1)
—————————————————————————————————————————————————————————————————————————————————————————————————————————————————— Layer Type Input Name Output Name Output Shape
—————————————————————————————————————————————————————————————————————————————————————————————————————————————————— DistributedSlotSparseEmbeddingHash data1 sparse_embedding1 (None, 3, 16)InnerProduct dense fc1 (None, 512)
Reshape sparse_embedding1 reshape1 (None, 48)
InnerProduct reshape1 fc2 (None, 1)
fc1BinaryCrossEntropyLoss fc2 loss
label=====================================================Model Fit===================================================== [HCTR][15:39:35.319][INFO][RK0][main]: Use non-epoch mode with number of iterations: 20 [HCTR][15:39:35.319][INFO][RK0][main]: Training batchsize: 10, evaluation batchsize: 10 [HCTR][15:39:35.319][INFO][RK0][main]: Evaluation interval: 200, snapshot interval: 10 [HCTR][15:39:35.319][INFO][RK0][main]: Dense network trainable: True [HCTR][15:39:35.319][INFO][RK0][main]: Sparse embedding sparse_embedding1 trainable: True [HCTR][15:39:35.319][INFO][RK0][main]: Use mixed precision: False, scaler: 1.000000, use cuda graph: True [HCTR][15:39:35.319][INFO][RK0][main]: lr: 0.001000, warmup_steps: 1, end_lr: 0.000000 [HCTR][15:39:35.319][INFO][RK0][main]: decay_start: 0, decay_steps: 1, decay_power: 2.000000 [HCTR][15:39:35.319][INFO][RK0][main]: Training source file: /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/train/file_list.txt [HCTR][15:39:35.319][INFO][RK0][main]: Evaluation source file: /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/train/file_list.txt [HCTR][15:39:35.324][DEBUG][RK0][tid #139698357917440]: file_name /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/train/part_0.parquet file_total_rows 64 [HCTR][15:39:35.329][DEBUG][RK0][tid #139698357917440]: file_name_ /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][15:39:35.333][INFO][RK0][main]: Rank0: Write hash table to file [HCTR][15:39:35.333][INFO][RK0][main]: Dumping sparse weights to files, successful [HCTR][15:39:35.358][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][15:39:35.394][INFO][RK0][main]: Done [HCTR][15:39:35.413][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][15:39:35.452][INFO][RK0][main]: Done [HCTR][15:39:35.453][INFO][RK0][main]: Dumping sparse optimzer states to files, successful [HCTR][15:39:35.453][INFO][RK0][main]: Dumping dense weights to file, successful [HCTR][15:39:35.454][INFO][RK0][main]: Dumping dense optimizer states to file, successful [HCTR][15:39:35.459][DEBUG][RK0][tid #139698357917440]: file_name_ /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][15:39:35.461][INFO][RK0][main]: Finish 20 iterations with batchsize: 10 in 0.14s. [HCTR][15:39:35.463][INFO][RK0][main]: Save the model graph to /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json successfully [HCTR][15:39:35.464][INFO][RK0][main]: Rank0: Write hash table to file [HCTR][15:39:35.464][INFO][RK0][main]: Dumping sparse weights to files, successful [HCTR][15:39:35.482][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][15:39:35.517][INFO][RK0][main]: Done [HCTR][15:39:35.536][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][15:39:35.574][INFO][RK0][main]: Done [HCTR][15:39:35.575][INFO][RK0][main]: Dumping sparse optimzer states to files, successful [HCTR][15:39:35.575][INFO][RK0][main]: Dumping dense weights to file, successful [HCTR][15:39:35.575][INFO][RK0][main]: Dumping dense optimizer states to file, successful [HCTR][15:39:38.463][INFO][RK0][main]: default_emb_vec_value is not specified using default: 0 [HCTR][15:39:38.464][INFO][RK0][main]: Creating HashMap CPU database backend... [HCTR][15:39:38.464][INFO][RK0][main]: Volatile DB: initial cache rate = 1 [HCTR][15:39:38.464][INFO][RK0][main]: Volatile DB: cache missed embeddings = 0 [HCTR][15:39:38.722][INFO][RK0][main]: Table: hps_et.0_hugectr.sparse_embedding1; cached 64 / 64 embeddings in volatile database (PreallocatedHashMapBackend); load: 64 / 18446744073709551615 (0.00%). [HCTR][15:39:38.722][DEBUG][RK0][main]: Real-time subscribers created! [HCTR][15:39:38.722][INFO][RK0][main]: Create embedding cache in device 0. [HCTR][15:39:38.723][INFO][RK0][main]: Use GPU embedding cache: True, cache size percentage: 0.500000 [HCTR][15:39:38.723][INFO][RK0][main]: Configured cache hit rate threshold: 0.900000 [HCTR][15:39:38.723][INFO][RK0][main]: The size of thread pool: 16 [HCTR][15:39:38.723][INFO][RK0][main]: The size of worker memory pool: 4 [HCTR][15:39:38.723][INFO][RK0][main]: The size of refresh memory pool: 1 [HCTR][15:39:38.739][INFO][RK0][main]: Global seed is 450741369 [HCTR][15:39:39.354][WARNING][RK0][main]: Peer-to-peer access cannot be fully enabled. [HCTR][15:39:39.354][INFO][RK0][main]: Start all2all warmup [HCTR][15:39:39.354][INFO][RK0][main]: End all2all warmup [HCTR][15:39:39.355][INFO][RK0][main]: Create inference session on device: 0 [HCTR][15:39:39.355][INFO][RK0][main]: Model name: 0_hugectr [HCTR][15:39:39.355][INFO][RK0][main]: Use mixed precision: False [HCTR][15:39:39.355][INFO][RK0][main]: Use cuda graph: True [HCTR][15:39:39.355][INFO][RK0][main]: Max batchsize: 64 [HCTR][15:39:39.355][INFO][RK0][main]: Use I64 input key: True [HCTR][15:39:39.355][INFO][RK0][main]: start create embedding for inference [HCTR][15:39:39.355][INFO][RK0][main]: sparse_input name data1 [HCTR][15:39:39.355][INFO][RK0][main]: create embedding for inference success [HCTR][15:39:39.355][INFO][RK0][main]: Inference stage skip BinaryCrossEntropyLoss layer, replaced by Sigmoid layer ----------------------------- Captured stderr call ----------------------------- I0701 15:39:35.862152 9240 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7fe326000000' with size 268435456 I0701 15:39:35.862903 9240 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864 I0701 15:39:35.866095 9240 model_repository_manager.cc:1191] loading: 0_predicthugectr:1 I0701 15:39:35.966416 9240 model_repository_manager.cc:1191] loading: 0_hugectr:1 I0701 15:39:35.971023 9240 python.cc:2388] TRITONBACKEND_ModelInstanceInitialize: 0_predicthugectr (GPU device 0) 0701 15:39:38.049032 9280 pb_stub.cc:301] Failed to initialize Python stub: ModuleNotFoundError: No module named 'merlin.systems.dag.ops.hugectr'
At:
(973): _find_and_load_unlocked (991): _find_and_load (1014): _gcd_import /usr/lib/python3.8/importlib/init.py(127): import_module /usr/local/lib/python3.8/dist-packages/merlin/systems/dag/op_runner.py(30): init /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_predicthugectr/1/model.py(66): initialize E0701 15:39:38.437315 9240 model_repository_manager.cc:1348] failed to load '0_predicthugectr' version 1: Internal: ModuleNotFoundError: No module named 'merlin.systems.dag.ops.hugectr'
At:
(973): _find_and_load_unlocked (991): _find_and_load (1014): _gcd_import /usr/lib/python3.8/importlib/init.py(127): import_module /usr/local/lib/python3.8/dist-packages/merlin/systems/dag/op_runner.py(30): init /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_predicthugectr/1/model.py(66): initialize I0701 15:39:38.463504 9240 hugectr.cc:1738] TRITONBACKEND_Initialize: hugectr I0701 15:39:38.463531 9240 hugectr.cc:1745] Triton TRITONBACKEND API version: 1.9 I0701 15:39:38.463540 9240 hugectr.cc:1749] 'hugectr' TRITONBACKEND API version: 1.9 I0701 15:39:38.463547 9240 hugectr.cc:1772] The HugeCTR backend Repository location: /opt/tritonserver/backends/hugectr I0701 15:39:38.463555 9240 hugectr.cc:1781] The HugeCTR backend configuration: {"cmdline":{"auto-complete-config":"false","backend-directory":"/opt/tritonserver/backends","min-compute-capability":"6.000000","ps":"/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/ps.json","default-max-batch-size":"4"}} I0701 15:39:38.463580 9240 hugectr.cc:345] *****Parsing Parameter Server Configuration from /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/ps.json I0701 15:39:38.463631 9240 hugectr.cc:366] Support 64-bit keys = 1 I0701 15:39:38.463665 9240 hugectr.cc:591] Model name = 0_hugectr I0701 15:39:38.463673 9240 hugectr.cc:600] Model '0_hugectr' -> network file = /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json I0701 15:39:38.463681 9240 hugectr.cc:607] Model '0_hugectr' -> max. batch size = 64 I0701 15:39:38.463687 9240 hugectr.cc:613] Model '0_hugectr' -> dense model file = /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/_dense_0.model I0701 15:39:38.463698 9240 hugectr.cc:619] Model '0_hugectr' -> sparse model files = [/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_sparse_0.model] I0701 15:39:38.463705 9240 hugectr.cc:630] Model '0_hugectr' -> use GPU embedding cache = 1 I0701 15:39:38.463721 9240 hugectr.cc:639] Model '0_hugectr' -> hit rate threshold = 0.9 I0701 15:39:38.463728 9240 hugectr.cc:647] Model '0_hugectr' -> per model GPU cache = 0.5 I0701 15:39:38.463745 9240 hugectr.cc:664] Model '0_hugectr' -> use_mixed_precision = 0 I0701 15:39:38.463753 9240 hugectr.cc:671] Model '0_hugectr' -> scaler = 1 I0701 15:39:38.463759 9240 hugectr.cc:677] Model '0_hugectr' -> use_algorithm_search = 1 I0701 15:39:38.463765 9240 hugectr.cc:685] Model '0_hugectr' -> use_cuda_graph = 1 I0701 15:39:38.463772 9240 hugectr.cc:692] Model '0_hugectr' -> num. pool worker buffers = 4 I0701 15:39:38.463779 9240 hugectr.cc:700] Model '0_hugectr' -> num. pool refresh buffers = 1 I0701 15:39:38.463808 9240 hugectr.cc:708] Model '0_hugectr' -> cache refresh rate per iteration = 0.2 I0701 15:39:38.463817 9240 hugectr.cc:717] Model '0_hugectr' -> deployed device list = [0] I0701 15:39:38.463825 9240 hugectr.cc:725] Model '0_hugectr' -> default value for each table = [0] I0701 15:39:38.463831 9240 hugectr.cc:733] Model '0_hugectr' -> maxnum_des_feature_per_sample = 1 I0701 15:39:38.463838 9240 hugectr.cc:741] Model '0_hugectr' -> refresh_delay = 0 I0701 15:39:38.463844 9240 hugectr.cc:747] Model '0_hugectr' -> refresh_interval = 0 I0701 15:39:38.463852 9240 hugectr.cc:755] Model '0_hugectr' -> maxnum_catfeature_query_per_table_per_sample list = [3] I0701 15:39:38.463859 9240 hugectr.cc:766] Model '0_hugectr' -> embedding_vecsize_per_table list = [16] I0701 15:39:38.463867 9240 hugectr.cc:773] Model '0_hugectr' -> embedding model names = [, sparse_embedding1] I0701 15:39:38.463873 9240 hugectr.cc:780] Model '0_hugectr' -> label_dim = 1 I0701 15:39:38.463880 9240 hugectr.cc:785] Model '0_hugectr' -> the number of slots = 3 I0701 15:39:38.463889 9240 hugectr.cc:806] *****The HugeCTR Backend Parameter Server is creating... ***** I0701 15:39:38.464027 9240 hugectr.cc:814] ***** Parameter Server(Int64) is creating... ***** I0701 15:39:38.727743 9240 hugectr.cc:825] *****The HugeCTR Backend Backend created the Parameter Server successfully! ***** I0701 15:39:38.727792 9240 hugectr.cc:1844] TRITONBACKEND_ModelInitialize: 0_hugectr (version 1) I0701 15:39:38.727799 9240 hugectr.cc:1857] Repository location: /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr I0701 15:39:38.727807 9240 hugectr.cc:1872] backend configuration in mode: {"cmdline":{"auto-complete-config":"false","backend-directory":"/opt/tritonserver/backends","min-compute-capability":"6.000000","ps":"/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/ps.json","default-max-batch-size":"4"}} I0701 15:39:38.727817 9240 hugectr.cc:1888] Parsing the latest Parameter Server json config file for deploying model 0_hugectr online I0701 15:39:38.727823 9240 hugectr.cc:1893] Hierarchical PS version is 0 and the current Model Version is 1 I0701 15:39:38.727829 9240 hugectr.cc:345] *****Parsing Parameter Server Configuration from /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/ps.json I0701 15:39:38.727872 9240 hugectr.cc:366] Support 64-bit keys = 1 I0701 15:39:38.727891 9240 hugectr.cc:591] Model name = 0_hugectr I0701 15:39:38.727899 9240 hugectr.cc:600] Model '0_hugectr' -> network file = /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json I0701 15:39:38.727906 9240 hugectr.cc:607] Model '0_hugectr' -> max. batch size = 64 I0701 15:39:38.727912 9240 hugectr.cc:613] Model '0_hugectr' -> dense model file = /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/_dense_0.model I0701 15:39:38.727920 9240 hugectr.cc:619] Model '0_hugectr' -> sparse model files = [/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_sparse_0.model] I0701 15:39:38.727926 9240 hugectr.cc:630] Model '0_hugectr' -> use GPU embedding cache = 1 I0701 15:39:38.727936 9240 hugectr.cc:639] Model '0_hugectr' -> hit rate threshold = 0.9 I0701 15:39:38.727943 9240 hugectr.cc:647] Model '0_hugectr' -> per model GPU cache = 0.5 I0701 15:39:38.727955 9240 hugectr.cc:664] Model '0_hugectr' -> use_mixed_precision = 0 I0701 15:39:38.727963 9240 hugectr.cc:671] Model '0_hugectr' -> scaler = 1 I0701 15:39:38.727969 9240 hugectr.cc:677] Model '0_hugectr' -> use_algorithm_search = 1 I0701 15:39:38.727974 9240 hugectr.cc:685] Model '0_hugectr' -> use_cuda_graph = 1 I0701 15:39:38.727981 9240 hugectr.cc:692] Model '0_hugectr' -> num. pool worker buffers = 4 I0701 15:39:38.727987 9240 hugectr.cc:700] Model '0_hugectr' -> num. pool refresh buffers = 1 I0701 15:39:38.727994 9240 hugectr.cc:708] Model '0_hugectr' -> cache refresh rate per iteration = 0.2 I0701 15:39:38.728002 9240 hugectr.cc:717] Model '0_hugectr' -> deployed device list = [0] I0701 15:39:38.728028 9240 hugectr.cc:725] Model '0_hugectr' -> default value for each table = [0] I0701 15:39:38.728034 9240 hugectr.cc:733] Model '0_hugectr' -> maxnum_des_feature_per_sample = 1 I0701 15:39:38.728041 9240 hugectr.cc:741] Model '0_hugectr' -> refresh_delay = 0 I0701 15:39:38.728047 9240 hugectr.cc:747] Model '0_hugectr' -> refresh_interval = 0 I0701 15:39:38.728054 9240 hugectr.cc:755] Model '0_hugectr' -> maxnum_catfeature_query_per_table_per_sample list = [3] I0701 15:39:38.728061 9240 hugectr.cc:766] Model '0_hugectr' -> embedding_vecsize_per_table list = [16] I0701 15:39:38.728068 9240 hugectr.cc:773] Model '0_hugectr' -> embedding model names = [, sparse_embedding1] I0701 15:39:38.728074 9240 hugectr.cc:780] Model '0_hugectr' -> label_dim = 1 I0701 15:39:38.728080 9240 hugectr.cc:785] Model '0_hugectr' -> the number of slots = 3 I0701 15:39:38.728462 9240 hugectr.cc:1078] Verifying model configuration: { "name": "0_hugectr", "platform": "", "backend": "hugectr", "version_policy": { "latest": { "num_versions": 1 } }, "max_batch_size": 64, "input": [ { "name": "DES", "data_type": "TYPE_FP32", "format": "FORMAT_NONE", "dims": [ -1 ], "is_shape_tensor": false, "allow_ragged_batch": false, "optional": false }, { "name": "CATCOLUMN", "data_type": "TYPE_INT64", "format": "FORMAT_NONE", "dims": [ -1 ], "is_shape_tensor": false, "allow_ragged_batch": false, "optional": false }, { "name": "ROWINDEX", "data_type": "TYPE_INT32", "format": "FORMAT_NONE", "dims": [ -1 ], "is_shape_tensor": false, "allow_ragged_batch": false, "optional": false } ], "output": [ { "name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [ -1 ], "label_filename": "", "is_shape_tensor": false } ], "batch_input": [], "batch_output": [], "optimization": { "priority": "PRIORITY_DEFAULT", "input_pinned_memory": { "enable": true }, "output_pinned_memory": { "enable": true }, "gather_kernel_buffer_threshold": 0, "eager_batching": false }, "instance_group": [ { "name": "0_hugectr_0", "kind": "KIND_GPU", "count": 1, "gpus": [ 0 ], "secondary_devices": [], "profile": [], "passive": false, "host_policy": "" } ], "default_model_filename": "", "cc_model_filenames": {}, "metric_tags": {}, "parameters": { "cat_feature_num": { "string_value": "3" }, "config": { "string_value": "/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json" }, "label_dim": { "string_value": "1" }, "max_nnz": { "string_value": "2" }, "embedding_vector_size": { "string_value": "16" }, "gpucacheper": { "string_value": "0.5" }, "des_feature_num": { "string_value": "1" }, "slot_sizes": { "string_value": "[[64, 64, 64]]" }, "embeddingkey_long_type": { "string_value": "true" }, "gpucache": { "string_value": "true" }, "slots": { "string_value": "3" } }, "model_warmup": [] } I0701 15:39:38.728498 9240 hugectr.cc:1164] The model configuration: { "name": "0_hugectr", "platform": "", "backend": "hugectr", "version_policy": { "latest": { "num_versions": 1 } }, "max_batch_size": 64, "input": [ { "name": "DES", "data_type": "TYPE_FP32", "format": "FORMAT_NONE", "dims": [ -1 ], "is_shape_tensor": false, "allow_ragged_batch": false, "optional": false }, { "name": "CATCOLUMN", "data_type": "TYPE_INT64", "format": "FORMAT_NONE", "dims": [ -1 ], "is_shape_tensor": false, "allow_ragged_batch": false, "optional": false }, { "name": "ROWINDEX", "data_type": "TYPE_INT32", "format": "FORMAT_NONE", "dims": [ -1 ], "is_shape_tensor": false, "allow_ragged_batch": false, "optional": false } ], "output": [ { "name": "OUTPUT0", "data_type": "TYPE_FP32", "dims": [ -1 ], "label_filename": "", "is_shape_tensor": false } ], "batch_input": [], "batch_output": [], "optimization": { "priority": "PRIORITY_DEFAULT", "input_pinned_memory": { "enable": true }, "output_pinned_memory": { "enable": true }, "gather_kernel_buffer_threshold": 0, "eager_batching": false }, "instance_group": [ { "name": "0_hugectr_0", "kind": "KIND_GPU", "count": 1, "gpus": [ 0 ], "secondary_devices": [], "profile": [], "passive": false, "host_policy": "" } ], "default_model_filename": "", "cc_model_filenames": {}, "metric_tags": {}, "parameters": { "cat_feature_num": { "string_value": "3" }, "config": { "string_value": "/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json" }, "label_dim": { "string_value": "1" }, "max_nnz": { "string_value": "2" }, "embedding_vector_size": { "string_value": "16" }, "gpucacheper": { "string_value": "0.5" }, "des_feature_num": { "string_value": "1" }, "slot_sizes": { "string_value": "[[64, 64, 64]]" }, "embeddingkey_long_type": { "string_value": "true" }, "gpucache": { "string_value": "true" }, "slots": { "string_value": "3" } }, "model_warmup": [] } I0701 15:39:38.728519 9240 hugectr.cc:1209] slots set = 3 I0701 15:39:38.728525 9240 hugectr.cc:1213] slots set = 3 I0701 15:39:38.728531 9240 hugectr.cc:1221] desene number = 1 I0701 15:39:38.728538 9240 hugectr.cc:1239] The max categorical feature number = 3 I0701 15:39:38.728544 9240 hugectr.cc:1244] embedding size = 16 I0701 15:39:38.728550 9240 hugectr.cc:1250] embedding size = 16 I0701 15:39:38.728556 9240 hugectr.cc:1256] maxnnz = 2 I0701 15:39:38.728564 9240 hugectr.cc:1265] refresh_interval = 0 I0701 15:39:38.728570 9240 hugectr.cc:1273] refresh_delay = 0 I0701 15:39:38.728576 9240 hugectr.cc:1281] HugeCTR model config path = /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json I0701 15:39:38.728584 9240 hugectr.cc:1329] support mixed_precision = 0 I0701 15:39:38.728593 9240 hugectr.cc:1348] gpu cache per = 0.5 I0701 15:39:38.728600 9240 hugectr.cc:1366] hit-rate threshold = 0.9 I0701 15:39:38.728606 9240 hugectr.cc:1374] Label dim = 1 I0701 15:39:38.728612 9240 hugectr.cc:1383] support 64-bit embedding key = 1 I0701 15:39:38.728618 9240 hugectr.cc:1394] Model_Inference_Para.max_batchsize: 64 I0701 15:39:38.728631 9240 hugectr.cc:1398] max_batch_size in model config.pbtxt is 64 I0701 15:39:38.728638 9240 hugectr.cc:1468] ******Creating Embedding Cache for model 0_hugectr in device 0 I0701 15:39:38.728644 9240 hugectr.cc:1495] ******Creating Embedding Cache for model 0_hugectr successfully I0701 15:39:38.729005 9240 hugectr.cc:1996] TRITONBACKEND_ModelInstanceInitialize: 0_hugectr_0 (device 0) I0701 15:39:38.729018 9240 hugectr.cc:1637] Triton Model Instance Initialization on device 0 I0701 15:39:38.729025 9240 hugectr.cc:1647] Dense Feature buffer allocation: I0701 15:39:38.738926 9240 hugectr.cc:1654] Categorical Feature buffer allocation: I0701 15:39:38.738962 9240 hugectr.cc:1672] Categorical Row Index buffer allocation: I0701 15:39:38.738975 9240 hugectr.cc:1680] Predict result buffer allocation: I0701 15:39:38.738987 9240 hugectr.cc:2009] Loading HugeCTR Model I0701 15:39:38.738994 9240 hugectr.cc:1698] The model origin json configuration file path is: /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json I0701 15:39:39.502476 9240 hugectr.cc:1706] ******Loading HugeCTR model successfully I0701 15:39:39.502645 9240 model_repository_manager.cc:1345] successfully loaded '0_hugectr' version 1 E0701 15:39:39.502711 9240 model_repository_manager.cc:1551] Invalid argument: ensemble 'ensemble_model' depends on '0_predicthugectr' which has no loaded version I0701 15:39:39.502877 9240 server.cc:556] +------------------+------+ | Repository Agent | Path | +------------------+------+ +------------------+------+
I0701 15:39:39.503064 9240 server.cc:583] +---------+---------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Backend | Path | Config | +---------+---------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | python | /opt/tritonserver/backends/python/libtriton_python.so | {"cmdline":{"auto-complete-config":"false","min-compute-capability":"6.000000","backend-directory":"/opt/tritonserver/backends","default-max-batch-size":"4"}} | | hugectr | /opt/tritonserver/backends/hugectr/libtriton_hugectr.so | {"cmdline":{"auto-complete-config":"false","backend-directory":"/opt/tritonserver/backends","min-compute-capability":"6.000000","ps":"/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/ps.json","default-max-batch-size":"4"}} | +---------+---------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0701 15:39:39.503207 9240 server.cc:626] +------------------+---------+----------------------------------------------------------------------------------------------------------------------+ | Model | Version | Status | +------------------+---------+----------------------------------------------------------------------------------------------------------------------+ | 0_hugectr | 1 | READY | | 0_predicthugectr | 1 | UNAVAILABLE: Internal: ModuleNotFoundError: No module named 'merlin.systems.dag.ops.hugectr' | | | | | | | | At: | | | |
(973): _find_and_load_unlocked | | | | (991): _find_and_load | | | | (1014): _gcd_import | | | | /usr/lib/python3.8/importlib/init.py(127): import_module | | | | /usr/local/lib/python3.8/dist-packages/merlin/systems/dag/op_runner.py(30): init | | | | /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_predicthugectr/1/model.py(66): initialize | +------------------+---------+----------------------------------------------------------------------------------------------------------------------+ I0701 15:39:39.536239 9240 metrics.cc:650] Collecting metrics for GPU 0: Tesla P100-DGXS-16GB I0701 15:39:39.537139 9240 tritonserver.cc:2138] +----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Option | Value | +----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | server_id | triton | | server_version | 2.22.0 | | server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace | | model_repository_path[0] | /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository | | model_control_mode | MODE_NONE | | strict_model_config | 1 | | rate_limit | OFF | | pinned_memory_pool_byte_size | 268435456 | | cuda_memory_pool_byte_size{0} | 67108864 | | response_cache_byte_size | 0 | | min_supported_compute_capability | 6.0 | | strict_readiness | 1 | | exit_timeout | 30 | +----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0701 15:39:39.537176 9240 server.cc:257] Waiting for in-flight requests to complete. I0701 15:39:39.537185 9240 server.cc:273] Timeout 30: Found 0 model versions that have in-flight inferences I0701 15:39:39.537195 9240 model_repository_manager.cc:1223] unloading: 0_hugectr:1 I0701 15:39:39.537236 9240 server.cc:288] All models are stopped, unloading models I0701 15:39:39.537244 9240 server.cc:295] Timeout 30: Found 1 live models and 0 in-flight non-inference requests I0701 15:39:39.537308 9240 hugectr.cc:2026] TRITONBACKEND_ModelInstanceFinalize: delete instance state I0701 15:39:39.558124 9240 hugectr.cc:1957] TRITONBACKEND_ModelFinalize: delete model state I0701 15:39:39.559197 9240 hugectr.cc:1505] ******Destorying Embedding Cache for model 0_hugectr successfully I0701 15:39:39.559252 9240 model_repository_manager.cc:1328] successfully unloaded '0_hugectr' version 1 I0701 15:39:40.537317 9240 server.cc:295] Timeout 29: Found 0 live models and 0 in-flight non-inference requests I0701 15:39:40.537386 9240 hugectr.cc:1827] TRITONBACKEND_Backend Finalize: HugectrBackend W0701 15:39:40.627541 9240 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0 W0701 15:39:40.627609 9240 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0 error: creating server: Internal - failed to load all models W0701 15:39:41.628776 9240 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0 W0701 15:39:41.628831 9240 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0 =============================== warnings summary =============================== ../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18 /var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The
nvtabular.framework_utils
module is being replaced by the Merlin Models library. Support for importing fromnvtabular.framework_utils
is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead. warnings.warn(tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet] tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet] tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet] /usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility. warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/fil/test_fil.py::test_binary_classifier_default[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_binary_classifier_with_proba[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_multi_classifier[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_regressor[sklearn_forest_regressor-get_model_params4] tests/unit/systems/fil/test_fil.py::test_model_file[sklearn_forest_regressor-checkpoint.tl] /usr/local/lib/python3.8/dist-packages/sklearn/utils/deprecation.py:103: FutureWarning: Attribute
n_features_
was deprecated in version 1.0 and will be removed in 1.2. Usen_features_in_
instead. warnings.warn(msg, category=FutureWarning)-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html =========================== short test summary info ============================ FAILED tests/unit/systems/fil/test_forest.py::test_load_from_config - Asserti... FAILED tests/unit/systems/hugectr/test_hugectr.py::test_predict_hugectr - Run... ======= 2 failed, 47 passed, 1 skipped, 18 warnings in 168.75s (0:02:48) ======= Build step 'Execute shell' marked build as failure Performing Post build task... Match found for : : True Logical operation result is TRUE Running script : #!/bin/bash cd /var/jenkins_home/ CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log" [merlin_systems] $ /bin/bash /tmp/jenkins1120361343703709016.sh
rerun tests
Click to view CI Results
GitHub pull request #125 of commit 088570474e008fa0580cb7ae6de1c4a2bceadf4e, no merge conflicts. GitHub pull request #125 of commit 088570474e008fa0580cb7ae6de1c4a2bceadf4e, no merge conflicts. Running as SYSTEM Setting status of 088570474e008fa0580cb7ae6de1c4a2bceadf4e to PENDING with url https://10.20.13.93:8080/job/merlin_systems/122/console and message: 'Pending' Using context: Jenkins Building on master in workspace /var/jenkins_home/workspace/merlin_systems using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e > git rev-parse --is-inside-work-tree # timeout=10 Fetching changes from the remote Git repository > git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10 Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems > git --version # timeout=10 using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken > git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10 > git rev-parse 088570474e008fa0580cb7ae6de1c4a2bceadf4e^{commit} # timeout=10 Checking out Revision 088570474e008fa0580cb7ae6de1c4a2bceadf4e (detached) > git config core.sparsecheckout # timeout=10 > git checkout -f 088570474e008fa0580cb7ae6de1c4a2bceadf4e # timeout=10 Commit message: "added check for categorical columns in constructor" > git rev-list --no-walk 088570474e008fa0580cb7ae6de1c4a2bceadf4e # timeout=10 [merlin_systems] $ /bin/bash /tmp/jenkins14042309132201631493.sh ============================= test session starts ============================== platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0 rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0 collected 50 itemstests/unit/test_version.py . [ 2%] tests/unit/systems/test_ensemble.py .... [ 10%] tests/unit/systems/test_ensemble_ops.py .. [ 14%] tests/unit/systems/test_export.py . [ 16%] tests/unit/systems/test_graph.py . [ 18%] tests/unit/systems/test_inference_ops.py .. [ 22%] tests/unit/systems/test_op_runner.py .... [ 30%] tests/unit/systems/test_tensorflow_inf_op.py ... [ 36%] tests/unit/systems/fil/test_fil.py .......................... [ 88%] tests/unit/systems/fil/test_forest.py F.. [ 94%] tests/unit/systems/hugectr/test_hugectr.py sF. [100%]
=================================== FAILURES =================================== ____________________________ test_load_from_config _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-4/test_load_from_config0')
def test_load_from_config(tmpdir): rows = 200 num_features = 16 X, y = sklearn.datasets.make_regression( n_samples=rows, n_features=num_features, n_informative=num_features // 3, random_state=0, ) model = xgboost.XGBRegressor() model.fit(X, y) feature_names = [str(i) for i in range(num_features)] input_schema = Schema([ColumnSchema(col, dtype=np.float32) for col in feature_names]) output_schema = Schema([ColumnSchema("output__0", dtype=np.float32)]) config = PredictForest(model, input_schema).export( tmpdir, input_schema, output_schema, node_id=2 ) node_config = json.loads(config.parameters[config.name].string_value)
assert json.loads(node_config["output_dict"]) == {
"output__0": {"dtype": "float32", "is_list": False, "is_ragged": False} }
E AssertionError: assert {'output__0':..., 'tags': []}} == {'output__0':...gged': False}} E Differing items: E {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False, 'tags': []}} != {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False}} E Full diff: E { E 'output__0': {'dtype': 'float32', E 'is_list': False, E - 'is_ragged': False}, E ? - E + 'is_ragged': False, E + 'tags': []}, E }
tests/unit/systems/fil/test_forest.py:57: AssertionError _____________________________ test_predict_hugectr _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0')
def test_predict_hugectr(tmpdir): cat_dtypes = {"a": int, "b": int, "c": int} categorical_columns = ["a", "b", "c"] gdf = make_df( { "a": np.arange(64, dtype=np.int64), "b": np.arange(64, dtype=np.int64), "c": np.arange(64, dtype=np.int64), "d": np.random.rand(64).tolist(), "label": [0] * 64, }, ) gdf["label"] = gdf["label"].astype("float32") gdf["d"] = gdf["d"].astype("float32") train_dataset = nvt.Dataset(gdf) dense_columns = ["d"] dict_dtypes = {} col_schemas = train_dataset.schema.column_schemas for col in dense_columns: col_schemas[col] = col_schemas[col].with_tags(Tags.CONTINUOUS) dict_dtypes[col] = np.float32 for col in categorical_columns: col_schemas[col] = col_schemas[col].with_tags(Tags.CATEGORICAL) dict_dtypes[col] = np.int64 for col in ["label"]: col_schemas[col] = col_schemas[col].with_tags(Tags.TARGET) dict_dtypes[col] = np.float32 train_path = os.path.join(tmpdir, "train/") os.mkdir(train_path) train_dataset.to_parquet( output_path=train_path, shuffle=nvt.io.Shuffle.PER_PARTITION, cats=categorical_columns, conts=dense_columns, labels=["label"], dtypes=dict_dtypes, ) embeddings = {"a": (64, 16), "b": (64, 16), "c": (64, 16)} total_cardinality = 0 slot_sizes = [] for column in cat_dtypes: slot_sizes.append(embeddings[column][0]) total_cardinality += embeddings[column][0] # slot sizes = list of caridinalities per column, total is sum of individual model = _run_model(slot_sizes, train_path, len(dense_columns)) model_op = PredictHugeCTR(model, train_dataset.schema, max_nnz=2, device_list=[0]) model_repository_path = os.path.join(tmpdir, "model_repository") input_schema = train_dataset.schema triton_chain = input_schema.column_names >> model_op ens = Ensemble(triton_chain, input_schema) os.makedirs(model_repository_path) enc_config, node_configs = ens.export(model_repository_path) assert enc_config assert len(node_configs) == 1 assert node_configs[0].name == "0_predicthugectr" df = train_dataset.to_ddf().compute()[:5] dense, cats, rowptr = _convert(df, slot_sizes, categorical_columns, labels=["label"])
response = _run_ensemble_on_tritonserver(
model_repository_path, ["OUTPUT0"], df, "ensemble_model", backend_config=f"hugectr,ps={tmpdir}/model_repository/ps.json", )
tests/unit/systems/hugectr/test_hugectr.py:314:
tests/unit/systems/utils/triton.py:39: in _run_ensemble_on_tritonserver with run_triton_server(tmpdir, backend_config=backend_config) as client: /usr/lib/python3.8/contextlib.py:113: in enter return next(self.gen)
modelpath = '/tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/model_repository' backend_config = 'hugectr,ps=/tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/model_repository/ps.json'
@contextlib.contextmanager def run_triton_server(modelpath, backend_config="tensorflow,version=2"): """This function starts up a Triton server instance and returns a client to it. Parameters ---------- modelpath : string The path to the model to load. Yields ------ client: tritonclient.InferenceServerClient The client connected to the Triton server. """ cmdline = [ TRITON_SERVER_PATH, "--model-repository", modelpath, f"--backend-config={backend_config}", ] env = os.environ.copy() env["CUDA_VISIBLE_DEVICES"] = "0" with subprocess.Popen(cmdline, env=env) as process: try: with grpcclient.InferenceServerClient("localhost:8001") as client: # wait until server is ready for _ in range(60): if process.poll() is not None: retcode = process.returncode
raise RuntimeError(f"Tritonserver failed to start (ret={retcode})")
E RuntimeError: Tritonserver failed to start (ret=1)
merlin/systems/triton/utils.py:46: RuntimeError ----------------------------- Captured stdout call ----------------------------- HugeCTR Version: 3.7 ====================================================Model Init===================================================== [HCTR][12:32:44.702][WARNING][RK0][main]: The model name is not specified when creating the solver. [HCTR][12:32:44.702][WARNING][RK0][main]: MPI was already initialized somewhere elese. Lifetime service disabled. [HCTR][12:32:44.702][INFO][RK0][main]: Global seed is 500188220 [HCTR][12:32:44.742][INFO][RK0][main]: Device to NUMA mapping: GPU 0 -> node 0 [HCTR][12:32:45.303][WARNING][RK0][main]: Peer-to-peer access cannot be fully enabled. [HCTR][12:32:45.303][INFO][RK0][main]: Start all2all warmup [HCTR][12:32:45.303][INFO][RK0][main]: End all2all warmup [HCTR][12:32:45.304][INFO][RK0][main]: Using All-reduce algorithm: NCCL [HCTR][12:32:45.304][INFO][RK0][main]: Device 0: Tesla P100-DGXS-16GB [HCTR][12:32:45.304][INFO][RK0][main]: num of DataReader workers: 1 [HCTR][12:32:45.304][INFO][RK0][main]: Vocabulary size: 0 [HCTR][12:32:45.304][INFO][RK0][main]: max_vocabulary_size_per_gpu_=584362 [HCTR][12:32:45.305][DEBUG][RK0][tid #140647730231040]: file_name_ /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][12:32:45.305][DEBUG][RK0][tid #140647721838336]: file_name_ /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][12:32:45.306][INFO][RK0][main]: Graph analysis to resolve tensor dependency ===================================================Model Compile=================================================== [HCTR][12:32:45.599][INFO][RK0][main]: gpu0 start to init embedding [HCTR][12:32:45.600][INFO][RK0][main]: gpu0 init embedding done [HCTR][12:32:45.601][INFO][RK0][main]: Starting AUC NCCL warm-up [HCTR][12:32:45.602][INFO][RK0][main]: Warm-up done ===================================================Model Summary=================================================== [HCTR][12:32:45.602][INFO][RK0][main]: label Dense Sparse
label dense data1
(None, 1) (None, 1)
—————————————————————————————————————————————————————————————————————————————————————————————————————————————————— Layer Type Input Name Output Name Output Shape
—————————————————————————————————————————————————————————————————————————————————————————————————————————————————— DistributedSlotSparseEmbeddingHash data1 sparse_embedding1 (None, 3, 16)InnerProduct dense fc1 (None, 512)
Reshape sparse_embedding1 reshape1 (None, 48)
InnerProduct reshape1 fc2 (None, 1)
fc1BinaryCrossEntropyLoss fc2 loss
label=====================================================Model Fit===================================================== [HCTR][12:32:45.602][INFO][RK0][main]: Use non-epoch mode with number of iterations: 20 [HCTR][12:32:45.602][INFO][RK0][main]: Training batchsize: 10, evaluation batchsize: 10 [HCTR][12:32:45.602][INFO][RK0][main]: Evaluation interval: 200, snapshot interval: 10 [HCTR][12:32:45.602][INFO][RK0][main]: Dense network trainable: True [HCTR][12:32:45.602][INFO][RK0][main]: Sparse embedding sparse_embedding1 trainable: True [HCTR][12:32:45.602][INFO][RK0][main]: Use mixed precision: False, scaler: 1.000000, use cuda graph: True [HCTR][12:32:45.602][INFO][RK0][main]: lr: 0.001000, warmup_steps: 1, end_lr: 0.000000 [HCTR][12:32:45.602][INFO][RK0][main]: decay_start: 0, decay_steps: 1, decay_power: 2.000000 [HCTR][12:32:45.602][INFO][RK0][main]: Training source file: /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/train/file_list.txt [HCTR][12:32:45.602][INFO][RK0][main]: Evaluation source file: /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/train/file_list.txt [HCTR][12:32:45.607][DEBUG][RK0][tid #140647730231040]: file_name /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/train/part_0.parquet file_total_rows 64 [HCTR][12:32:45.612][DEBUG][RK0][tid #140647730231040]: file_name_ /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][12:32:45.627][INFO][RK0][main]: Rank0: Write hash table to file [HCTR][12:32:45.627][INFO][RK0][main]: Dumping sparse weights to files, successful [HCTR][12:32:45.645][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][12:32:45.680][INFO][RK0][main]: Done [HCTR][12:32:45.699][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][12:32:45.736][INFO][RK0][main]: Done [HCTR][12:32:45.738][INFO][RK0][main]: Dumping sparse optimzer states to files, successful [HCTR][12:32:45.738][INFO][RK0][main]: Dumping dense weights to file, successful [HCTR][12:32:45.738][INFO][RK0][main]: Dumping dense optimizer states to file, successful [HCTR][12:32:45.743][DEBUG][RK0][tid #140647730231040]: file_name_ /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][12:32:45.745][INFO][RK0][main]: Finish 20 iterations with batchsize: 10 in 0.14s. [HCTR][12:32:45.747][INFO][RK0][main]: Save the model graph to /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json successfully [HCTR][12:32:45.748][INFO][RK0][main]: Rank0: Write hash table to file [HCTR][12:32:45.748][INFO][RK0][main]: Dumping sparse weights to files, successful [HCTR][12:32:45.766][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][12:32:45.800][INFO][RK0][main]: Done [HCTR][12:32:45.819][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][12:32:45.856][INFO][RK0][main]: Done [HCTR][12:32:45.858][INFO][RK0][main]: Dumping sparse optimzer states to files, successful [HCTR][12:32:45.858][INFO][RK0][main]: Dumping dense weights to file, successful [HCTR][12:32:45.858][INFO][RK0][main]: Dumping dense optimizer states to file, successful ----------------------------- Captured stderr call ----------------------------- I0705 12:32:46.157697 946 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7f6614000000' with size 268435456 I0705 12:32:46.158435 946 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864 I0705 12:32:46.161665 946 model_repository_manager.cc:1191] loading: 0_predicthugectr:1 I0705 12:32:46.261999 946 model_repository_manager.cc:1191] loading: 0_hugectr:1 I0705 12:32:46.269000 946 python.cc:2388] TRITONBACKEND_ModelInstanceInitialize: 0_predicthugectr (GPU device 0) I0705 12:32:49.001932 946 model_repository_manager.cc:1345] successfully loaded '0_predicthugectr' version 1 I0705 12:32:49.031773 946 hugectr.cc:1738] TRITONBACKEND_Initialize: hugectr I0705 12:32:49.031802 946 hugectr.cc:1745] Triton TRITONBACKEND API version: 1.9 I0705 12:32:49.031811 946 hugectr.cc:1749] 'hugectr' TRITONBACKEND API version: 1.10 I0705 12:32:49.031822 946 hugectr.cc:1827] TRITONBACKEND_Backend Finalize: HugectrBackend E0705 12:32:49.031832 946 model_repository_manager.cc:1348] failed to load '0_hugectr' version 1: Unsupported: Triton backend API version does not support this backend I0705 12:32:49.032114 946 model_repository_manager.cc:1191] loading: ensemble_model:1 I0705 12:32:49.153145 946 model_repository_manager.cc:1345] successfully loaded 'ensemble_model' version 1 I0705 12:32:49.153270 946 server.cc:556] +------------------+------+ | Repository Agent | Path | +------------------+------+ +------------------+------+
I0705 12:32:49.153356 946 server.cc:583] +---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Backend | Path | Config | +---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ | python | /opt/tritonserver/backends/python/libtriton_python.so | {"cmdline":{"auto-complete-config":"false","min-compute-capability":"6.000000","backend-directory":"/opt/tritonserver/backends","default-max-batch-size":"4"}} | +---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0705 12:32:49.153445 946 server.cc:626] +------------------+---------+------------------------------------------------------------------------------------+ | Model | Version | Status | +------------------+---------+------------------------------------------------------------------------------------+ | 0_hugectr | 1 | UNAVAILABLE: Unsupported: Triton backend API version does not support this backend | | 0_predicthugectr | 1 | READY | | ensemble_model | 1 | READY | +------------------+---------+------------------------------------------------------------------------------------+
I0705 12:32:49.216576 946 metrics.cc:650] Collecting metrics for GPU 0: Tesla P100-DGXS-16GB I0705 12:32:49.217411 946 tritonserver.cc:2138] +----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Option | Value | +----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | server_id | triton | | server_version | 2.22.0 | | server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace | | model_repository_path[0] | /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/model_repository | | model_control_mode | MODE_NONE | | strict_model_config | 1 | | rate_limit | OFF | | pinned_memory_pool_byte_size | 268435456 | | cuda_memory_pool_byte_size{0} | 67108864 | | response_cache_byte_size | 0 | | min_supported_compute_capability | 6.0 | | strict_readiness | 1 | | exit_timeout | 30 | +----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0705 12:32:49.217449 946 server.cc:257] Waiting for in-flight requests to complete. I0705 12:32:49.217458 946 server.cc:273] Timeout 30: Found 0 model versions that have in-flight inferences I0705 12:32:49.217464 946 model_repository_manager.cc:1223] unloading: ensemble_model:1 I0705 12:32:49.217502 946 model_repository_manager.cc:1223] unloading: 0_predicthugectr:1 I0705 12:32:49.217579 946 server.cc:288] All models are stopped, unloading models I0705 12:32:49.217589 946 server.cc:295] Timeout 30: Found 2 live models and 0 in-flight non-inference requests I0705 12:32:49.217725 946 model_repository_manager.cc:1328] successfully unloaded 'ensemble_model' version 1 I0705 12:32:50.217672 946 server.cc:295] Timeout 29: Found 1 live models and 0 in-flight non-inference requests W0705 12:32:50.237878 946 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0 W0705 12:32:50.237934 946 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0 I0705 12:32:50.611706 946 model_repository_manager.cc:1328] successfully unloaded '0_predicthugectr' version 1 I0705 12:32:51.217802 946 server.cc:295] Timeout 28: Found 0 live models and 0 in-flight non-inference requests error: creating server: Internal - failed to load all models W0705 12:32:51.238086 946 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0 W0705 12:32:51.238136 946 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0 =============================== warnings summary =============================== ../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18 /var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The
nvtabular.framework_utils
module is being replaced by the Merlin Models library. Support for importing fromnvtabular.framework_utils
is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead. warnings.warn(tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet] tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet] tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet] /usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility. warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/fil/test_fil.py::test_binary_classifier_default[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_binary_classifier_with_proba[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_multi_classifier[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_regressor[sklearn_forest_regressor-get_model_params4] tests/unit/systems/fil/test_fil.py::test_model_file[sklearn_forest_regressor-checkpoint.tl] /usr/local/lib/python3.8/dist-packages/sklearn/utils/deprecation.py:103: FutureWarning: Attribute
n_features_
was deprecated in version 1.0 and will be removed in 1.2. Usen_features_in_
instead. warnings.warn(msg, category=FutureWarning)-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html =========================== short test summary info ============================ FAILED tests/unit/systems/fil/test_forest.py::test_load_from_config - Asserti... FAILED tests/unit/systems/hugectr/test_hugectr.py::test_predict_hugectr - Run... ======= 2 failed, 47 passed, 1 skipped, 18 warnings in 164.00s (0:02:43) ======= Build step 'Execute shell' marked build as failure Performing Post build task... Match found for : : True Logical operation result is TRUE Running script : #!/bin/bash cd /var/jenkins_home/ CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log" [merlin_systems] $ /bin/bash /tmp/jenkins18132275909526090334.sh
rerun tests
Click to view CI Results
GitHub pull request #125 of commit 088570474e008fa0580cb7ae6de1c4a2bceadf4e, no merge conflicts. GitHub pull request #125 of commit 088570474e008fa0580cb7ae6de1c4a2bceadf4e, no merge conflicts. Running as SYSTEM Setting status of 088570474e008fa0580cb7ae6de1c4a2bceadf4e to PENDING with url https://10.20.13.93:8080/job/merlin_systems/123/console and message: 'Pending' Using context: Jenkins Building on master in workspace /var/jenkins_home/workspace/merlin_systems using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e > git rev-parse --is-inside-work-tree # timeout=10 Fetching changes from the remote Git repository > git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10 Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems > git --version # timeout=10 using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken > git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10 > git rev-parse 088570474e008fa0580cb7ae6de1c4a2bceadf4e^{commit} # timeout=10 Checking out Revision 088570474e008fa0580cb7ae6de1c4a2bceadf4e (detached) > git config core.sparsecheckout # timeout=10 > git checkout -f 088570474e008fa0580cb7ae6de1c4a2bceadf4e # timeout=10 Commit message: "added check for categorical columns in constructor" > git rev-list --no-walk 088570474e008fa0580cb7ae6de1c4a2bceadf4e # timeout=10 [merlin_systems] $ /bin/bash /tmp/jenkins12995046985373788952.sh PYTHONPATH=/var/jenkins_home/workspace/merlin_systems/systems:/usr/local/hugectr/lib ============================= test session starts ============================== platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0 rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0 collected 50 itemstests/unit/test_version.py . [ 2%] tests/unit/systems/test_ensemble.py .... [ 10%] tests/unit/systems/test_ensemble_ops.py .. [ 14%] tests/unit/systems/test_export.py . [ 16%] tests/unit/systems/test_graph.py . [ 18%] tests/unit/systems/test_inference_ops.py .. [ 22%] tests/unit/systems/test_op_runner.py .... [ 30%] tests/unit/systems/test_tensorflow_inf_op.py ... [ 36%] tests/unit/systems/fil/test_fil.py .......................... [ 88%] tests/unit/systems/fil/test_forest.py F.. [ 94%] tests/unit/systems/hugectr/test_hugectr.py sF. [100%]
=================================== FAILURES =================================== ____________________________ test_load_from_config _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-5/test_load_from_config0')
def test_load_from_config(tmpdir): rows = 200 num_features = 16 X, y = sklearn.datasets.make_regression( n_samples=rows, n_features=num_features, n_informative=num_features // 3, random_state=0, ) model = xgboost.XGBRegressor() model.fit(X, y) feature_names = [str(i) for i in range(num_features)] input_schema = Schema([ColumnSchema(col, dtype=np.float32) for col in feature_names]) output_schema = Schema([ColumnSchema("output__0", dtype=np.float32)]) config = PredictForest(model, input_schema).export( tmpdir, input_schema, output_schema, node_id=2 ) node_config = json.loads(config.parameters[config.name].string_value)
assert json.loads(node_config["output_dict"]) == {
"output__0": {"dtype": "float32", "is_list": False, "is_ragged": False} }
E AssertionError: assert {'output__0':..., 'tags': []}} == {'output__0':...gged': False}} E Differing items: E {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False, 'tags': []}} != {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False}} E Full diff: E { E 'output__0': {'dtype': 'float32', E 'is_list': False, E - 'is_ragged': False}, E ? - E + 'is_ragged': False, E + 'tags': []}, E }
tests/unit/systems/fil/test_forest.py:57: AssertionError _____________________________ test_predict_hugectr _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0')
def test_predict_hugectr(tmpdir): cat_dtypes = {"a": int, "b": int, "c": int} categorical_columns = ["a", "b", "c"] gdf = make_df( { "a": np.arange(64, dtype=np.int64), "b": np.arange(64, dtype=np.int64), "c": np.arange(64, dtype=np.int64), "d": np.random.rand(64).tolist(), "label": [0] * 64, }, ) gdf["label"] = gdf["label"].astype("float32") gdf["d"] = gdf["d"].astype("float32") train_dataset = nvt.Dataset(gdf) dense_columns = ["d"] dict_dtypes = {} col_schemas = train_dataset.schema.column_schemas for col in dense_columns: col_schemas[col] = col_schemas[col].with_tags(Tags.CONTINUOUS) dict_dtypes[col] = np.float32 for col in categorical_columns: col_schemas[col] = col_schemas[col].with_tags(Tags.CATEGORICAL) dict_dtypes[col] = np.int64 for col in ["label"]: col_schemas[col] = col_schemas[col].with_tags(Tags.TARGET) dict_dtypes[col] = np.float32 train_path = os.path.join(tmpdir, "train/") os.mkdir(train_path) train_dataset.to_parquet( output_path=train_path, shuffle=nvt.io.Shuffle.PER_PARTITION, cats=categorical_columns, conts=dense_columns, labels=["label"], dtypes=dict_dtypes, ) embeddings = {"a": (64, 16), "b": (64, 16), "c": (64, 16)} total_cardinality = 0 slot_sizes = [] for column in cat_dtypes: slot_sizes.append(embeddings[column][0]) total_cardinality += embeddings[column][0] # slot sizes = list of caridinalities per column, total is sum of individual model = _run_model(slot_sizes, train_path, len(dense_columns)) model_op = PredictHugeCTR(model, train_dataset.schema, max_nnz=2, device_list=[0]) model_repository_path = os.path.join(tmpdir, "model_repository") input_schema = train_dataset.schema triton_chain = input_schema.column_names >> model_op ens = Ensemble(triton_chain, input_schema) os.makedirs(model_repository_path) enc_config, node_configs = ens.export(model_repository_path) assert enc_config assert len(node_configs) == 1 assert node_configs[0].name == "0_predicthugectr" df = train_dataset.to_ddf().compute()[:5] dense, cats, rowptr = _convert(df, slot_sizes, categorical_columns, labels=["label"])
response = _run_ensemble_on_tritonserver(
model_repository_path, ["OUTPUT0"], df, "ensemble_model", backend_config=f"hugectr,ps={tmpdir}/model_repository/ps.json", )
tests/unit/systems/hugectr/test_hugectr.py:314:
tests/unit/systems/utils/triton.py:39: in _run_ensemble_on_tritonserver with run_triton_server(tmpdir, backend_config=backend_config) as client: /usr/lib/python3.8/contextlib.py:113: in enter return next(self.gen)
modelpath = '/tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/model_repository' backend_config = 'hugectr,ps=/tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/model_repository/ps.json'
@contextlib.contextmanager def run_triton_server(modelpath, backend_config="tensorflow,version=2"): """This function starts up a Triton server instance and returns a client to it. Parameters ---------- modelpath : string The path to the model to load. Yields ------ client: tritonclient.InferenceServerClient The client connected to the Triton server. """ cmdline = [ TRITON_SERVER_PATH, "--model-repository", modelpath, f"--backend-config={backend_config}", ] env = os.environ.copy() env["CUDA_VISIBLE_DEVICES"] = "0" with subprocess.Popen(cmdline, env=env) as process: try: with grpcclient.InferenceServerClient("localhost:8001") as client: # wait until server is ready for _ in range(60): if process.poll() is not None: retcode = process.returncode
raise RuntimeError(f"Tritonserver failed to start (ret={retcode})")
E RuntimeError: Tritonserver failed to start (ret=1)
merlin/systems/triton/utils.py:46: RuntimeError ----------------------------- Captured stdout call ----------------------------- HugeCTR Version: 3.7 ====================================================Model Init===================================================== [HCTR][12:40:35.769][WARNING][RK0][main]: The model name is not specified when creating the solver. [HCTR][12:40:35.769][WARNING][RK0][main]: MPI was already initialized somewhere elese. Lifetime service disabled. [HCTR][12:40:35.769][INFO][RK0][main]: Global seed is 4255386450 [HCTR][12:40:35.815][INFO][RK0][main]: Device to NUMA mapping: GPU 0 -> node 0 [HCTR][12:40:36.376][WARNING][RK0][main]: Peer-to-peer access cannot be fully enabled. [HCTR][12:40:36.376][INFO][RK0][main]: Start all2all warmup [HCTR][12:40:36.376][INFO][RK0][main]: End all2all warmup [HCTR][12:40:36.377][INFO][RK0][main]: Using All-reduce algorithm: NCCL [HCTR][12:40:36.377][INFO][RK0][main]: Device 0: Tesla P100-DGXS-16GB [HCTR][12:40:36.377][INFO][RK0][main]: num of DataReader workers: 1 [HCTR][12:40:36.378][INFO][RK0][main]: Vocabulary size: 0 [HCTR][12:40:36.378][INFO][RK0][main]: max_vocabulary_size_per_gpu_=584362 [HCTR][12:40:36.378][DEBUG][RK0][tid #139836535068416]: file_name_ /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][12:40:36.378][DEBUG][RK0][tid #139836526675712]: file_name_ /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][12:40:36.379][INFO][RK0][main]: Graph analysis to resolve tensor dependency ===================================================Model Compile=================================================== [HCTR][12:40:36.675][INFO][RK0][main]: gpu0 start to init embedding [HCTR][12:40:36.676][INFO][RK0][main]: gpu0 init embedding done [HCTR][12:40:36.677][INFO][RK0][main]: Starting AUC NCCL warm-up [HCTR][12:40:36.678][INFO][RK0][main]: Warm-up done ===================================================Model Summary=================================================== [HCTR][12:40:36.678][INFO][RK0][main]: label Dense Sparse
label dense data1
(None, 1) (None, 1)
—————————————————————————————————————————————————————————————————————————————————————————————————————————————————— Layer Type Input Name Output Name Output Shape
—————————————————————————————————————————————————————————————————————————————————————————————————————————————————— DistributedSlotSparseEmbeddingHash data1 sparse_embedding1 (None, 3, 16)InnerProduct dense fc1 (None, 512)
Reshape sparse_embedding1 reshape1 (None, 48)
InnerProduct reshape1 fc2 (None, 1)
fc1BinaryCrossEntropyLoss fc2 loss
label=====================================================Model Fit===================================================== [HCTR][12:40:36.678][INFO][RK0][main]: Use non-epoch mode with number of iterations: 20 [HCTR][12:40:36.678][INFO][RK0][main]: Training batchsize: 10, evaluation batchsize: 10 [HCTR][12:40:36.678][INFO][RK0][main]: Evaluation interval: 200, snapshot interval: 10 [HCTR][12:40:36.678][INFO][RK0][main]: Dense network trainable: True [HCTR][12:40:36.678][INFO][RK0][main]: Sparse embedding sparse_embedding1 trainable: True [HCTR][12:40:36.678][INFO][RK0][main]: Use mixed precision: False, scaler: 1.000000, use cuda graph: True [HCTR][12:40:36.678][INFO][RK0][main]: lr: 0.001000, warmup_steps: 1, end_lr: 0.000000 [HCTR][12:40:36.678][INFO][RK0][main]: decay_start: 0, decay_steps: 1, decay_power: 2.000000 [HCTR][12:40:36.678][INFO][RK0][main]: Training source file: /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/train/file_list.txt [HCTR][12:40:36.678][INFO][RK0][main]: Evaluation source file: /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/train/file_list.txt [HCTR][12:40:36.683][DEBUG][RK0][tid #139836535068416]: file_name /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/train/part_0.parquet file_total_rows 64 [HCTR][12:40:36.688][DEBUG][RK0][tid #139836535068416]: file_name_ /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][12:40:36.692][INFO][RK0][main]: Rank0: Write hash table to file [HCTR][12:40:36.692][INFO][RK0][main]: Dumping sparse weights to files, successful [HCTR][12:40:36.717][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][12:40:36.752][INFO][RK0][main]: Done [HCTR][12:40:36.771][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][12:40:36.808][INFO][RK0][main]: Done [HCTR][12:40:36.810][INFO][RK0][main]: Dumping sparse optimzer states to files, successful [HCTR][12:40:36.810][INFO][RK0][main]: Dumping dense weights to file, successful [HCTR][12:40:36.810][INFO][RK0][main]: Dumping dense optimizer states to file, successful [HCTR][12:40:36.815][DEBUG][RK0][tid #139836535068416]: file_name_ /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][12:40:36.818][INFO][RK0][main]: Finish 20 iterations with batchsize: 10 in 0.14s. [HCTR][12:40:36.819][INFO][RK0][main]: Save the model graph to /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json successfully [HCTR][12:40:36.820][INFO][RK0][main]: Rank0: Write hash table to file [HCTR][12:40:36.820][INFO][RK0][main]: Dumping sparse weights to files, successful [HCTR][12:40:36.838][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][12:40:36.872][INFO][RK0][main]: Done [HCTR][12:40:36.891][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][12:40:36.930][INFO][RK0][main]: Done [HCTR][12:40:36.931][INFO][RK0][main]: Dumping sparse optimzer states to files, successful [HCTR][12:40:36.931][INFO][RK0][main]: Dumping dense weights to file, successful [HCTR][12:40:36.932][INFO][RK0][main]: Dumping dense optimizer states to file, successful ----------------------------- Captured stderr call ----------------------------- I0705 12:40:37.235594 2391 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7f9716000000' with size 268435456 I0705 12:40:37.236408 2391 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864 I0705 12:40:37.239737 2391 model_repository_manager.cc:1191] loading: 0_predicthugectr:1 I0705 12:40:37.340066 2391 model_repository_manager.cc:1191] loading: 0_hugectr:1 I0705 12:40:37.347009 2391 python.cc:2388] TRITONBACKEND_ModelInstanceInitialize: 0_predicthugectr (GPU device 0) I0705 12:40:40.057794 2391 model_repository_manager.cc:1345] successfully loaded '0_predicthugectr' version 1 I0705 12:40:40.091206 2391 hugectr.cc:1738] TRITONBACKEND_Initialize: hugectr I0705 12:40:40.091234 2391 hugectr.cc:1745] Triton TRITONBACKEND API version: 1.9 I0705 12:40:40.091243 2391 hugectr.cc:1749] 'hugectr' TRITONBACKEND API version: 1.10 I0705 12:40:40.091254 2391 hugectr.cc:1827] TRITONBACKEND_Backend Finalize: HugectrBackend E0705 12:40:40.091264 2391 model_repository_manager.cc:1348] failed to load '0_hugectr' version 1: Unsupported: Triton backend API version does not support this backend I0705 12:40:40.091491 2391 model_repository_manager.cc:1191] loading: ensemble_model:1 I0705 12:40:40.211529 2391 model_repository_manager.cc:1345] successfully loaded 'ensemble_model' version 1 I0705 12:40:40.211668 2391 server.cc:556] +------------------+------+ | Repository Agent | Path | +------------------+------+ +------------------+------+
I0705 12:40:40.211777 2391 server.cc:583] +---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Backend | Path | Config | +---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ | python | /opt/tritonserver/backends/python/libtriton_python.so | {"cmdline":{"auto-complete-config":"false","min-compute-capability":"6.000000","backend-directory":"/opt/tritonserver/backends","default-max-batch-size":"4"}} | +---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0705 12:40:40.211871 2391 server.cc:626] +------------------+---------+------------------------------------------------------------------------------------+ | Model | Version | Status | +------------------+---------+------------------------------------------------------------------------------------+ | 0_hugectr | 1 | UNAVAILABLE: Unsupported: Triton backend API version does not support this backend | | 0_predicthugectr | 1 | READY | | ensemble_model | 1 | READY | +------------------+---------+------------------------------------------------------------------------------------+
I0705 12:40:40.275562 2391 metrics.cc:650] Collecting metrics for GPU 0: Tesla P100-DGXS-16GB I0705 12:40:40.276444 2391 tritonserver.cc:2138] +----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Option | Value | +----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | server_id | triton | | server_version | 2.22.0 | | server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace | | model_repository_path[0] | /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/model_repository | | model_control_mode | MODE_NONE | | strict_model_config | 1 | | rate_limit | OFF | | pinned_memory_pool_byte_size | 268435456 | | cuda_memory_pool_byte_size{0} | 67108864 | | response_cache_byte_size | 0 | | min_supported_compute_capability | 6.0 | | strict_readiness | 1 | | exit_timeout | 30 | +----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0705 12:40:40.276483 2391 server.cc:257] Waiting for in-flight requests to complete. I0705 12:40:40.276492 2391 server.cc:273] Timeout 30: Found 0 model versions that have in-flight inferences I0705 12:40:40.276498 2391 model_repository_manager.cc:1223] unloading: ensemble_model:1 I0705 12:40:40.276538 2391 model_repository_manager.cc:1223] unloading: 0_predicthugectr:1 I0705 12:40:40.276630 2391 server.cc:288] All models are stopped, unloading models I0705 12:40:40.276639 2391 server.cc:295] Timeout 30: Found 2 live models and 0 in-flight non-inference requests I0705 12:40:40.276783 2391 model_repository_manager.cc:1328] successfully unloaded 'ensemble_model' version 1 I0705 12:40:41.276723 2391 server.cc:295] Timeout 29: Found 1 live models and 0 in-flight non-inference requests W0705 12:40:41.292990 2391 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0 W0705 12:40:41.293040 2391 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0 I0705 12:40:41.691664 2391 model_repository_manager.cc:1328] successfully unloaded '0_predicthugectr' version 1 I0705 12:40:42.276872 2391 server.cc:295] Timeout 28: Found 0 live models and 0 in-flight non-inference requests error: creating server: Internal - failed to load all models W0705 12:40:42.293197 2391 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0 W0705 12:40:42.293250 2391 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0 =============================== warnings summary =============================== ../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18 /var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The
nvtabular.framework_utils
module is being replaced by the Merlin Models library. Support for importing fromnvtabular.framework_utils
is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead. warnings.warn(tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet] tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet] tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet] /usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility. warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/fil/test_fil.py::test_binary_classifier_default[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_binary_classifier_with_proba[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_multi_classifier[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_regressor[sklearn_forest_regressor-get_model_params4] tests/unit/systems/fil/test_fil.py::test_model_file[sklearn_forest_regressor-checkpoint.tl] /usr/local/lib/python3.8/dist-packages/sklearn/utils/deprecation.py:103: FutureWarning: Attribute
n_features_
was deprecated in version 1.0 and will be removed in 1.2. Usen_features_in_
instead. warnings.warn(msg, category=FutureWarning)-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html =========================== short test summary info ============================ FAILED tests/unit/systems/fil/test_forest.py::test_load_from_config - Asserti... FAILED tests/unit/systems/hugectr/test_hugectr.py::test_predict_hugectr - Run... ======= 2 failed, 47 passed, 1 skipped, 18 warnings in 174.24s (0:02:54) ======= Build step 'Execute shell' marked build as failure Performing Post build task... Match found for : : True Logical operation result is TRUE Running script : #!/bin/bash cd /var/jenkins_home/ CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log" [merlin_systems] $ /bin/bash /tmp/jenkins8397006177805341591.sh
rerun tests
Click to view CI Results
GitHub pull request #125 of commit 088570474e008fa0580cb7ae6de1c4a2bceadf4e, no merge conflicts. GitHub pull request #125 of commit 088570474e008fa0580cb7ae6de1c4a2bceadf4e, no merge conflicts. Running as SYSTEM Setting status of 088570474e008fa0580cb7ae6de1c4a2bceadf4e to PENDING with url https://10.20.13.93:8080/job/merlin_systems/124/console and message: 'Pending' Using context: Jenkins Building on master in workspace /var/jenkins_home/workspace/merlin_systems using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e > git rev-parse --is-inside-work-tree # timeout=10 Fetching changes from the remote Git repository > git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10 Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems > git --version # timeout=10 using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken > git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10 > git rev-parse 088570474e008fa0580cb7ae6de1c4a2bceadf4e^{commit} # timeout=10 Checking out Revision 088570474e008fa0580cb7ae6de1c4a2bceadf4e (detached) > git config core.sparsecheckout # timeout=10 > git checkout -f 088570474e008fa0580cb7ae6de1c4a2bceadf4e # timeout=10 Commit message: "added check for categorical columns in constructor" > git rev-list --no-walk 088570474e008fa0580cb7ae6de1c4a2bceadf4e # timeout=10 [merlin_systems] $ /bin/bash /tmp/jenkins4914296926181841090.sh PYTHONPATH=/usr/local/hugectr/lib:/var/jenkins_home/workspace/merlin_systems/systems ============================= test session starts ============================== platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0 rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0 collected 50 itemstests/unit/test_version.py . [ 2%] tests/unit/systems/test_ensemble.py .... [ 10%] tests/unit/systems/test_ensemble_ops.py .. [ 14%] tests/unit/systems/test_export.py . [ 16%] tests/unit/systems/test_graph.py . [ 18%] tests/unit/systems/test_inference_ops.py .. [ 22%] tests/unit/systems/test_op_runner.py .... [ 30%] tests/unit/systems/test_tensorflow_inf_op.py ... [ 36%] tests/unit/systems/fil/test_fil.py .......................... [ 88%] tests/unit/systems/fil/test_forest.py F.. [ 94%] tests/unit/systems/hugectr/test_hugectr.py sF. [100%]
=================================== FAILURES =================================== ____________________________ test_load_from_config _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-6/test_load_from_config0')
def test_load_from_config(tmpdir): rows = 200 num_features = 16 X, y = sklearn.datasets.make_regression( n_samples=rows, n_features=num_features, n_informative=num_features // 3, random_state=0, ) model = xgboost.XGBRegressor() model.fit(X, y) feature_names = [str(i) for i in range(num_features)] input_schema = Schema([ColumnSchema(col, dtype=np.float32) for col in feature_names]) output_schema = Schema([ColumnSchema("output__0", dtype=np.float32)]) config = PredictForest(model, input_schema).export( tmpdir, input_schema, output_schema, node_id=2 ) node_config = json.loads(config.parameters[config.name].string_value)
assert json.loads(node_config["output_dict"]) == {
"output__0": {"dtype": "float32", "is_list": False, "is_ragged": False} }
E AssertionError: assert {'output__0':..., 'tags': []}} == {'output__0':...gged': False}} E Differing items: E {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False, 'tags': []}} != {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False}} E Full diff: E { E 'output__0': {'dtype': 'float32', E 'is_list': False, E - 'is_ragged': False}, E ? - E + 'is_ragged': False, E + 'tags': []}, E }
tests/unit/systems/fil/test_forest.py:57: AssertionError _____________________________ test_predict_hugectr _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0')
def test_predict_hugectr(tmpdir): cat_dtypes = {"a": int, "b": int, "c": int} categorical_columns = ["a", "b", "c"] gdf = make_df( { "a": np.arange(64, dtype=np.int64), "b": np.arange(64, dtype=np.int64), "c": np.arange(64, dtype=np.int64), "d": np.random.rand(64).tolist(), "label": [0] * 64, }, ) gdf["label"] = gdf["label"].astype("float32") gdf["d"] = gdf["d"].astype("float32") train_dataset = nvt.Dataset(gdf) dense_columns = ["d"] dict_dtypes = {} col_schemas = train_dataset.schema.column_schemas for col in dense_columns: col_schemas[col] = col_schemas[col].with_tags(Tags.CONTINUOUS) dict_dtypes[col] = np.float32 for col in categorical_columns: col_schemas[col] = col_schemas[col].with_tags(Tags.CATEGORICAL) dict_dtypes[col] = np.int64 for col in ["label"]: col_schemas[col] = col_schemas[col].with_tags(Tags.TARGET) dict_dtypes[col] = np.float32 train_path = os.path.join(tmpdir, "train/") os.mkdir(train_path) train_dataset.to_parquet( output_path=train_path, shuffle=nvt.io.Shuffle.PER_PARTITION, cats=categorical_columns, conts=dense_columns, labels=["label"], dtypes=dict_dtypes, ) embeddings = {"a": (64, 16), "b": (64, 16), "c": (64, 16)} total_cardinality = 0 slot_sizes = [] for column in cat_dtypes: slot_sizes.append(embeddings[column][0]) total_cardinality += embeddings[column][0] # slot sizes = list of caridinalities per column, total is sum of individual model = _run_model(slot_sizes, train_path, len(dense_columns)) model_op = PredictHugeCTR(model, train_dataset.schema, max_nnz=2, device_list=[0]) model_repository_path = os.path.join(tmpdir, "model_repository") input_schema = train_dataset.schema triton_chain = input_schema.column_names >> model_op ens = Ensemble(triton_chain, input_schema) os.makedirs(model_repository_path) enc_config, node_configs = ens.export(model_repository_path) assert enc_config assert len(node_configs) == 1 assert node_configs[0].name == "0_predicthugectr" df = train_dataset.to_ddf().compute()[:5] dense, cats, rowptr = _convert(df, slot_sizes, categorical_columns, labels=["label"])
response = _run_ensemble_on_tritonserver(
model_repository_path, ["OUTPUT0"], df, "ensemble_model", backend_config=f"hugectr,ps={tmpdir}/model_repository/ps.json", )
tests/unit/systems/hugectr/test_hugectr.py:314:
tests/unit/systems/utils/triton.py:39: in _run_ensemble_on_tritonserver with run_triton_server(tmpdir, backend_config=backend_config) as client: /usr/lib/python3.8/contextlib.py:113: in enter return next(self.gen)
modelpath = '/tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/model_repository' backend_config = 'hugectr,ps=/tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/model_repository/ps.json'
@contextlib.contextmanager def run_triton_server(modelpath, backend_config="tensorflow,version=2"): """This function starts up a Triton server instance and returns a client to it. Parameters ---------- modelpath : string The path to the model to load. Yields ------ client: tritonclient.InferenceServerClient The client connected to the Triton server. """ cmdline = [ TRITON_SERVER_PATH, "--model-repository", modelpath, f"--backend-config={backend_config}", ] env = os.environ.copy() env["CUDA_VISIBLE_DEVICES"] = "0" with subprocess.Popen(cmdline, env=env) as process: try: with grpcclient.InferenceServerClient("localhost:8001") as client: # wait until server is ready for _ in range(60): if process.poll() is not None: retcode = process.returncode
raise RuntimeError(f"Tritonserver failed to start (ret={retcode})")
E RuntimeError: Tritonserver failed to start (ret=1)
merlin/systems/triton/utils.py:46: RuntimeError ----------------------------- Captured stdout call ----------------------------- HugeCTR Version: 3.7 ====================================================Model Init===================================================== [HCTR][12:43:58.864][WARNING][RK0][main]: The model name is not specified when creating the solver. [HCTR][12:43:58.864][WARNING][RK0][main]: MPI was already initialized somewhere elese. Lifetime service disabled. [HCTR][12:43:58.864][INFO][RK0][main]: Global seed is 832437304 [HCTR][12:43:58.903][INFO][RK0][main]: Device to NUMA mapping: GPU 0 -> node 0 [HCTR][12:43:59.471][WARNING][RK0][main]: Peer-to-peer access cannot be fully enabled. [HCTR][12:43:59.471][INFO][RK0][main]: Start all2all warmup [HCTR][12:43:59.471][INFO][RK0][main]: End all2all warmup [HCTR][12:43:59.472][INFO][RK0][main]: Using All-reduce algorithm: NCCL [HCTR][12:43:59.472][INFO][RK0][main]: Device 0: Tesla P100-DGXS-16GB [HCTR][12:43:59.472][INFO][RK0][main]: num of DataReader workers: 1 [HCTR][12:43:59.473][INFO][RK0][main]: Vocabulary size: 0 [HCTR][12:43:59.473][INFO][RK0][main]: max_vocabulary_size_per_gpu_=584362 [HCTR][12:43:59.473][DEBUG][RK0][tid #140066899773184]: file_name_ /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][12:43:59.473][DEBUG][RK0][tid #140066891380480]: file_name_ /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][12:43:59.475][INFO][RK0][main]: Graph analysis to resolve tensor dependency ===================================================Model Compile=================================================== [HCTR][12:43:59.771][INFO][RK0][main]: gpu0 start to init embedding [HCTR][12:43:59.772][INFO][RK0][main]: gpu0 init embedding done [HCTR][12:43:59.773][INFO][RK0][main]: Starting AUC NCCL warm-up [HCTR][12:43:59.773][INFO][RK0][main]: Warm-up done ===================================================Model Summary=================================================== [HCTR][12:43:59.773][INFO][RK0][main]: label Dense Sparse
label dense data1
(None, 1) (None, 1)
—————————————————————————————————————————————————————————————————————————————————————————————————————————————————— Layer Type Input Name Output Name Output Shape
—————————————————————————————————————————————————————————————————————————————————————————————————————————————————— DistributedSlotSparseEmbeddingHash data1 sparse_embedding1 (None, 3, 16)InnerProduct dense fc1 (None, 512)
Reshape sparse_embedding1 reshape1 (None, 48)
InnerProduct reshape1 fc2 (None, 1)
fc1BinaryCrossEntropyLoss fc2 loss
label=====================================================Model Fit===================================================== [HCTR][12:43:59.774][INFO][RK0][main]: Use non-epoch mode with number of iterations: 20 [HCTR][12:43:59.774][INFO][RK0][main]: Training batchsize: 10, evaluation batchsize: 10 [HCTR][12:43:59.774][INFO][RK0][main]: Evaluation interval: 200, snapshot interval: 10 [HCTR][12:43:59.774][INFO][RK0][main]: Dense network trainable: True [HCTR][12:43:59.774][INFO][RK0][main]: Sparse embedding sparse_embedding1 trainable: True [HCTR][12:43:59.774][INFO][RK0][main]: Use mixed precision: False, scaler: 1.000000, use cuda graph: True [HCTR][12:43:59.774][INFO][RK0][main]: lr: 0.001000, warmup_steps: 1, end_lr: 0.000000 [HCTR][12:43:59.774][INFO][RK0][main]: decay_start: 0, decay_steps: 1, decay_power: 2.000000 [HCTR][12:43:59.774][INFO][RK0][main]: Training source file: /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/train/file_list.txt [HCTR][12:43:59.774][INFO][RK0][main]: Evaluation source file: /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/train/file_list.txt [HCTR][12:43:59.779][DEBUG][RK0][tid #140066899773184]: file_name /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/train/part_0.parquet file_total_rows 64 [HCTR][12:43:59.783][DEBUG][RK0][tid #140066899773184]: file_name_ /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][12:43:59.787][INFO][RK0][main]: Rank0: Write hash table to file [HCTR][12:43:59.788][INFO][RK0][main]: Dumping sparse weights to files, successful [HCTR][12:43:59.813][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][12:43:59.848][INFO][RK0][main]: Done [HCTR][12:43:59.867][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][12:43:59.904][INFO][RK0][main]: Done [HCTR][12:43:59.906][INFO][RK0][main]: Dumping sparse optimzer states to files, successful [HCTR][12:43:59.906][INFO][RK0][main]: Dumping dense weights to file, successful [HCTR][12:43:59.906][INFO][RK0][main]: Dumping dense optimizer states to file, successful [HCTR][12:43:59.911][DEBUG][RK0][tid #140066899773184]: file_name_ /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64 [HCTR][12:43:59.913][INFO][RK0][main]: Finish 20 iterations with batchsize: 10 in 0.14s. [HCTR][12:43:59.915][INFO][RK0][main]: Save the model graph to /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json successfully [HCTR][12:43:59.916][INFO][RK0][main]: Rank0: Write hash table to file [HCTR][12:43:59.916][INFO][RK0][main]: Dumping sparse weights to files, successful [HCTR][12:43:59.933][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][12:43:59.969][INFO][RK0][main]: Done [HCTR][12:43:59.987][INFO][RK0][main]: Rank0: Write optimzer state to file [HCTR][12:44:00.024][INFO][RK0][main]: Done [HCTR][12:44:00.026][INFO][RK0][main]: Dumping sparse optimzer states to files, successful [HCTR][12:44:00.026][INFO][RK0][main]: Dumping dense weights to file, successful [HCTR][12:44:00.026][INFO][RK0][main]: Dumping dense optimizer states to file, successful ----------------------------- Captured stderr call ----------------------------- I0705 12:44:00.316030 3734 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7fdd24000000' with size 268435456 I0705 12:44:00.316781 3734 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864 I0705 12:44:00.319964 3734 model_repository_manager.cc:1191] loading: 0_predicthugectr:1 I0705 12:44:00.420272 3734 model_repository_manager.cc:1191] loading: 0_hugectr:1 I0705 12:44:00.424791 3734 python.cc:2388] TRITONBACKEND_ModelInstanceInitialize: 0_predicthugectr (GPU device 0) I0705 12:44:03.110730 3734 model_repository_manager.cc:1345] successfully loaded '0_predicthugectr' version 1 I0705 12:44:03.144641 3734 hugectr.cc:1738] TRITONBACKEND_Initialize: hugectr I0705 12:44:03.144673 3734 hugectr.cc:1745] Triton TRITONBACKEND API version: 1.9 I0705 12:44:03.144685 3734 hugectr.cc:1749] 'hugectr' TRITONBACKEND API version: 1.10 I0705 12:44:03.144699 3734 hugectr.cc:1827] TRITONBACKEND_Backend Finalize: HugectrBackend E0705 12:44:03.144716 3734 model_repository_manager.cc:1348] failed to load '0_hugectr' version 1: Unsupported: Triton backend API version does not support this backend I0705 12:44:03.145020 3734 model_repository_manager.cc:1191] loading: ensemble_model:1 I0705 12:44:03.272356 3734 model_repository_manager.cc:1345] successfully loaded 'ensemble_model' version 1 I0705 12:44:03.272526 3734 server.cc:556] +------------------+------+ | Repository Agent | Path | +------------------+------+ +------------------+------+
I0705 12:44:03.272627 3734 server.cc:583] +---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Backend | Path | Config | +---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+ | python | /opt/tritonserver/backends/python/libtriton_python.so | {"cmdline":{"auto-complete-config":"false","min-compute-capability":"6.000000","backend-directory":"/opt/tritonserver/backends","default-max-batch-size":"4"}} | +---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0705 12:44:03.272750 3734 server.cc:626] +------------------+---------+------------------------------------------------------------------------------------+ | Model | Version | Status | +------------------+---------+------------------------------------------------------------------------------------+ | 0_hugectr | 1 | UNAVAILABLE: Unsupported: Triton backend API version does not support this backend | | 0_predicthugectr | 1 | READY | | ensemble_model | 1 | READY | +------------------+---------+------------------------------------------------------------------------------------+
I0705 12:44:03.338094 3734 metrics.cc:650] Collecting metrics for GPU 0: Tesla P100-DGXS-16GB I0705 12:44:03.338985 3734 tritonserver.cc:2138] +----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | Option | Value | +----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | server_id | triton | | server_version | 2.22.0 | | server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace | | model_repository_path[0] | /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/model_repository | | model_control_mode | MODE_NONE | | strict_model_config | 1 | | rate_limit | OFF | | pinned_memory_pool_byte_size | 268435456 | | cuda_memory_pool_byte_size{0} | 67108864 | | response_cache_byte_size | 0 | | min_supported_compute_capability | 6.0 | | strict_readiness | 1 | | exit_timeout | 30 | +----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0705 12:44:03.339022 3734 server.cc:257] Waiting for in-flight requests to complete. I0705 12:44:03.339032 3734 server.cc:273] Timeout 30: Found 0 model versions that have in-flight inferences I0705 12:44:03.339041 3734 model_repository_manager.cc:1223] unloading: ensemble_model:1 I0705 12:44:03.339080 3734 model_repository_manager.cc:1223] unloading: 0_predicthugectr:1 I0705 12:44:03.339154 3734 server.cc:288] All models are stopped, unloading models I0705 12:44:03.339164 3734 server.cc:295] Timeout 30: Found 2 live models and 0 in-flight non-inference requests I0705 12:44:03.339264 3734 model_repository_manager.cc:1328] successfully unloaded 'ensemble_model' version 1 I0705 12:44:04.339254 3734 server.cc:295] Timeout 29: Found 1 live models and 0 in-flight non-inference requests W0705 12:44:04.356687 3734 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0 W0705 12:44:04.356746 3734 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0 I0705 12:44:04.765274 3734 model_repository_manager.cc:1328] successfully unloaded '0_predicthugectr' version 1 I0705 12:44:05.339401 3734 server.cc:295] Timeout 28: Found 0 live models and 0 in-flight non-inference requests error: creating server: Internal - failed to load all models W0705 12:44:05.356913 3734 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0 W0705 12:44:05.356961 3734 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0 =============================== warnings summary =============================== ../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18 /var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The
nvtabular.framework_utils
module is being replaced by the Merlin Models library. Support for importing fromnvtabular.framework_utils
is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead. warnings.warn(tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet] tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet] tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet] tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet] tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet] /usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility. warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet] /var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model warnings.warn(
tests/unit/systems/fil/test_fil.py::test_binary_classifier_default[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_binary_classifier_with_proba[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_multi_classifier[sklearn_forest_classifier-get_model_params4] tests/unit/systems/fil/test_fil.py::test_regressor[sklearn_forest_regressor-get_model_params4] tests/unit/systems/fil/test_fil.py::test_model_file[sklearn_forest_regressor-checkpoint.tl] /usr/local/lib/python3.8/dist-packages/sklearn/utils/deprecation.py:103: FutureWarning: Attribute
n_features_
was deprecated in version 1.0 and will be removed in 1.2. Usen_features_in_
instead. warnings.warn(msg, category=FutureWarning)-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html =========================== short test summary info ============================ FAILED tests/unit/systems/fil/test_forest.py::test_load_from_config - Asserti... FAILED tests/unit/systems/hugectr/test_hugectr.py::test_predict_hugectr - Run... ======= 2 failed, 47 passed, 1 skipped, 18 warnings in 172.43s (0:02:52) ======= Build step 'Execute shell' marked build as failure Performing Post build task... Match found for : : True Logical operation result is TRUE Running script : #!/bin/bash cd /var/jenkins_home/ CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log" [merlin_systems] $ /bin/bash /tmp/jenkins1025052810390896047.sh