systems
systems copied to clipboard
DAG (wrapper) operator for HugeCTR serving support
This PR addresses concerns around hugectr support. This represents the foundations for supporting a hugectr model in merlin systems. Creates the operator that will house a hugectr model and allow it to be used in the merlin graph for inference operations.
Click to view CI Results
GitHub pull request #125 of commit 8986c8a491173b051732e8d54adbbcd04cca1454, no merge conflicts.
Running as SYSTEM
Setting status of 8986c8a491173b051732e8d54adbbcd04cca1454 to PENDING with url https://10.20.13.93:8080/job/merlin_systems/100/console and message: 'Pending'
Using context: Jenkins
Building on master in workspace /var/jenkins_home/workspace/merlin_systems
using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e
> git rev-parse --is-inside-work-tree # timeout=10
Fetching changes from the remote Git repository
> git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10
Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems
> git --version # timeout=10
using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken
> git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10
> git rev-parse 8986c8a491173b051732e8d54adbbcd04cca1454^{commit} # timeout=10
Checking out Revision 8986c8a491173b051732e8d54adbbcd04cca1454 (detached)
> git config core.sparsecheckout # timeout=10
> git checkout -f 8986c8a491173b051732e8d54adbbcd04cca1454 # timeout=10
Commit message: "add foundation of hugectr op"
> git rev-list --no-walk fc4e464729df3bd367bb990310b5f2119af35a46 # timeout=10
[merlin_systems] $ /bin/bash /tmp/jenkins10386077036665179152.sh
============================= test session starts ==============================
platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0
rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml
plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0
collected 18 items / 2 skipped
tests/unit/test_version.py . [ 5%]
tests/unit/systems/test_ensemble.py ... [ 22%]
tests/unit/systems/test_ensemble_ops.py .. [ 33%]
tests/unit/systems/test_export.py . [ 38%]
tests/unit/systems/test_graph.py . [ 44%]
tests/unit/systems/test_inference_ops.py .. [ 55%]
tests/unit/systems/test_op_runner.py .... [ 77%]
tests/unit/systems/test_tensorflow_inf_op.py ... [ 94%]
tests/unit/systems/hugectr/test_hugectr.py F [100%]
=================================== FAILURES ===================================
________________________________ test_training _________________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-0/test_training0')
def test_training(tmpdir):
# Download & Convert data
download_file(
"http://files.grouplens.org/datasets/movielens/ml-25m.zip",
os.path.join(DATA_DIR, "ml-25m.zip"),
)
ratings = cudf.read_csv(os.path.join(DATA_DIR, "ml-25m", "ratings.csv"))
ratings["new_cat1"] = ratings["userId"] / ratings["movieId"]
ratings["new_cat1"] = ratings["new_cat1"].astype("int64")
ratings.head()
ratings = ratings.drop("timestamp", axis=1)
train, valid = train_test_split(ratings, test_size=0.2, random_state=42)
train.to_parquet(DATA_DIR + "train.parquet")
valid.to_parquet(DATA_DIR + "valid.parquet")
del train
del valid
gc.collect()
# Perform ETL with NVTabular
cat_features = CATEGORICAL_COLUMNS >> nvt.ops.Categorify(cat_cache="device")
ratings = nvt.ColumnSelector(["rating"]) >> nvt.ops.LambdaOp(
lambda col: (col > 3).astype("int8")
)
output = cat_features + ratings
workflow = nvt.Workflow(output)
train_dataset = nvt.Dataset(DATA_DIR + "train.parquet", part_size="100MB")
valid_dataset = nvt.Dataset(DATA_DIR + "valid.parquet", part_size="100MB")
workflow.fit(train_dataset)
dict_dtypes = {}
for col in CATEGORICAL_COLUMNS:
dict_dtypes[col] = np.int64
for col in LABEL_COLUMNS:
dict_dtypes[col] = np.float32
if path.exists(DATA_DIR + "train"):
shutil.rmtree(os.path.join(DATA_DIR, "train"))
if path.exists(DATA_DIR + "valid"):
shutil.rmtree(os.path.join(DATA_DIR, "valid"))
workflow.transform(train_dataset).to_parquet(
output_path=DATA_DIR + "train/",
shuffle=nvt.io.Shuffle.PER_PARTITION,
cats=CATEGORICAL_COLUMNS,
labels=LABEL_COLUMNS,
dtypes=dict_dtypes,
)
workflow.transform(valid_dataset).to_parquet(
output_path=DATA_DIR + "valid/",
shuffle=False,
cats=CATEGORICAL_COLUMNS,
labels=LABEL_COLUMNS,
dtypes=dict_dtypes,
)
# Train with HugeCTR
embeddings = get_embedding_sizes(workflow)
total_cardinality = 0
slot_sizes = []
for column in CATEGORICAL_COLUMNS:
slot_sizes.append(embeddings[column][0])
total_cardinality += embeddings[column][0]
test_data_path = DATA_DIR + "test/"
if path.exists(test_data_path):
shutil.rmtree(test_data_path)
os.mkdir(test_data_path)
if path.exists(MODEL_DIR):
shutil.rmtree(MODEL_DIR)
os.makedirs(TRAIN_DIR)
sample_data = cudf.read_parquet(DATA_DIR + "valid.parquet", num_rows=TEST_N_ROWS)
sample_data.to_csv(test_data_path + "data.csv")
sample_data_trans = nvt.workflow.workflow._transform_partition(
sample_data, [workflow.output_node]
)
dense_features, embedding_columns, row_ptrs = _convert(sample_data_trans, slot_sizes)
model = _run_model(slot_sizes, total_cardinality)
tests/unit/systems/hugectr/test_hugectr.py:306:
slot_sizes = [162542, 56586, 76204], total_cardinality = 295332
def _run_model(slot_sizes, total_cardinality):
solver = hugectr.CreateSolver(
vvgpu=[[0]],
batchsize=2048,
batchsize_eval=2048,
max_eval_batches=160,
i64_input_key=True,
use_mixed_precision=False,
repeat_dataset=True,
)
E AttributeError: 'NoneType' object has no attribute 'CreateSolver'
tests/unit/systems/hugectr/test_hugectr.py:65: AttributeError
----------------------------- Captured stderr call -----------------------------
downloading ml-25m.zip: 0.00B [00:00, ?B/s]
downloading ml-25m.zip: 0%| | 0.00/262M [00:00<?, ?B/s]
downloading ml-25m.zip: 0%| | 57.3k/262M [00:00<10:11, 428kB/s]
downloading ml-25m.zip: 0%| | 197k/262M [00:00<05:06, 854kB/s]
downloading ml-25m.zip: 0%| | 410k/262M [00:00<03:22, 1.29MB/s]
downloading ml-25m.zip: 0%| | 647k/262M [00:00<02:45, 1.58MB/s]
downloading ml-25m.zip: 0%| | 893k/262M [00:00<02:31, 1.72MB/s]
downloading ml-25m.zip: 0%| | 1.16M/262M [00:00<02:16, 1.90MB/s]
downloading ml-25m.zip: 1%| | 1.45M/262M [00:00<02:03, 2.11MB/s]
downloading ml-25m.zip: 1%| | 1.77M/262M [00:00<01:53, 2.28MB/s]
downloading ml-25m.zip: 1%| | 2.11M/262M [00:01<01:44, 2.48MB/s]
downloading ml-25m.zip: 1%| | 2.47M/262M [00:01<01:36, 2.68MB/s]
downloading ml-25m.zip: 1%| | 2.87M/262M [00:01<01:28, 2.93MB/s]
downloading ml-25m.zip: 1%|▏ | 3.28M/262M [00:01<01:24, 3.07MB/s]
downloading ml-25m.zip: 1%|▏ | 3.74M/262M [00:01<01:18, 3.29MB/s]
downloading ml-25m.zip: 2%|▏ | 4.24M/262M [00:01<01:12, 3.57MB/s]
downloading ml-25m.zip: 2%|▏ | 4.77M/262M [00:01<01:06, 3.89MB/s]
downloading ml-25m.zip: 2%|▏ | 5.33M/262M [00:01<01:01, 4.20MB/s]
downloading ml-25m.zip: 2%|▏ | 5.92M/262M [00:02<00:57, 4.49MB/s]
downloading ml-25m.zip: 2%|▏ | 6.47M/262M [00:02<00:53, 4.75MB/s]
downloading ml-25m.zip: 3%|▎ | 6.96M/262M [00:02<00:55, 4.62MB/s]
downloading ml-25m.zip: 3%|▎ | 7.42M/262M [00:02<00:55, 4.63MB/s]
downloading ml-25m.zip: 3%|▎ | 7.99M/262M [00:02<00:53, 4.73MB/s]
downloading ml-25m.zip: 3%|▎ | 8.58M/262M [00:02<00:50, 5.04MB/s]
downloading ml-25m.zip: 3%|▎ | 9.08M/262M [00:02<00:52, 4.85MB/s]
downloading ml-25m.zip: 4%|▎ | 9.58M/262M [00:02<00:55, 4.52MB/s]
downloading ml-25m.zip: 4%|▍ | 10.1M/262M [00:02<00:53, 4.74MB/s]
downloading ml-25m.zip: 4%|▍ | 10.7M/262M [00:02<00:50, 4.95MB/s]
downloading ml-25m.zip: 4%|▍ | 11.2M/262M [00:03<00:53, 4.69MB/s]
downloading ml-25m.zip: 4%|▍ | 11.7M/262M [00:03<00:52, 4.80MB/s]
downloading ml-25m.zip: 5%|▍ | 12.3M/262M [00:03<00:49, 5.03MB/s]
downloading ml-25m.zip: 5%|▍ | 12.8M/262M [00:03<00:49, 5.03MB/s]
downloading ml-25m.zip: 5%|▌ | 13.3M/262M [00:03<00:55, 4.50MB/s]
downloading ml-25m.zip: 5%|▌ | 13.8M/262M [00:03<00:59, 4.14MB/s]
downloading ml-25m.zip: 5%|▌ | 14.2M/262M [00:03<00:57, 4.27MB/s]
downloading ml-25m.zip: 6%|▌ | 14.7M/262M [00:03<00:58, 4.21MB/s]
downloading ml-25m.zip: 6%|▌ | 15.1M/262M [00:04<01:02, 3.95MB/s]
downloading ml-25m.zip: 6%|▌ | 15.5M/262M [00:04<01:05, 3.79MB/s]
downloading ml-25m.zip: 6%|▌ | 15.9M/262M [00:04<01:04, 3.83MB/s]
downloading ml-25m.zip: 6%|▋ | 16.4M/262M [00:04<01:02, 3.95MB/s]
downloading ml-25m.zip: 6%|▋ | 16.9M/262M [00:04<01:00, 4.05MB/s]
downloading ml-25m.zip: 7%|▋ | 17.4M/262M [00:04<00:58, 4.18MB/s]
downloading ml-25m.zip: 7%|▋ | 17.9M/262M [00:04<00:56, 4.31MB/s]
downloading ml-25m.zip: 7%|▋ | 18.4M/262M [00:04<00:55, 4.38MB/s]
downloading ml-25m.zip: 7%|▋ | 19.0M/262M [00:04<00:54, 4.48MB/s]
downloading ml-25m.zip: 7%|▋ | 19.5M/262M [00:05<00:52, 4.64MB/s]
downloading ml-25m.zip: 8%|▊ | 19.9M/262M [00:05<00:53, 4.54MB/s]
downloading ml-25m.zip: 8%|▊ | 20.4M/262M [00:05<00:52, 4.57MB/s]
downloading ml-25m.zip: 8%|▊ | 20.9M/262M [00:05<00:51, 4.64MB/s]
downloading ml-25m.zip: 8%|▊ | 21.4M/262M [00:05<00:50, 4.75MB/s]
downloading ml-25m.zip: 8%|▊ | 21.9M/262M [00:05<00:50, 4.73MB/s]
downloading ml-25m.zip: 9%|▊ | 22.4M/262M [00:05<00:50, 4.70MB/s]
downloading ml-25m.zip: 9%|▉ | 22.9M/262M [00:05<00:49, 4.80MB/s]
downloading ml-25m.zip: 9%|▉ | 23.5M/262M [00:05<00:48, 4.89MB/s]
downloading ml-25m.zip: 9%|▉ | 24.1M/262M [00:05<00:46, 5.13MB/s]
downloading ml-25m.zip: 9%|▉ | 24.6M/262M [00:06<00:45, 5.17MB/s]
downloading ml-25m.zip: 10%|▉ | 25.2M/262M [00:06<00:46, 5.15MB/s]
downloading ml-25m.zip: 10%|▉ | 25.7M/262M [00:06<00:47, 5.00MB/s]
downloading ml-25m.zip: 10%|█ | 26.2M/262M [00:06<00:47, 5.00MB/s]
downloading ml-25m.zip: 10%|█ | 26.8M/262M [00:06<00:45, 5.14MB/s]
downloading ml-25m.zip: 10%|█ | 27.3M/262M [00:06<00:45, 5.15MB/s]
downloading ml-25m.zip: 11%|█ | 27.9M/262M [00:06<00:44, 5.29MB/s]
downloading ml-25m.zip: 11%|█ | 28.5M/262M [00:06<00:43, 5.32MB/s]
downloading ml-25m.zip: 11%|█ | 29.0M/262M [00:06<00:45, 5.10MB/s]
downloading ml-25m.zip: 11%|█▏ | 29.5M/262M [00:07<00:45, 5.07MB/s]
downloading ml-25m.zip: 11%|█▏ | 30.1M/262M [00:07<00:44, 5.16MB/s]
downloading ml-25m.zip: 12%|█▏ | 30.6M/262M [00:07<00:46, 5.00MB/s]
downloading ml-25m.zip: 12%|█▏ | 31.1M/262M [00:07<00:47, 4.90MB/s]
downloading ml-25m.zip: 12%|█▏ | 31.6M/262M [00:07<00:46, 4.96MB/s]
downloading ml-25m.zip: 12%|█▏ | 32.1M/262M [00:07<00:47, 4.88MB/s]
downloading ml-25m.zip: 12%|█▏ | 32.6M/262M [00:07<00:46, 4.90MB/s]
downloading ml-25m.zip: 13%|█▎ | 33.2M/262M [00:07<00:45, 4.99MB/s]
downloading ml-25m.zip: 13%|█▎ | 33.9M/262M [00:07<00:44, 5.10MB/s]
downloading ml-25m.zip: 13%|█▎ | 34.5M/262M [00:08<00:43, 5.21MB/s]
downloading ml-25m.zip: 13%|█▎ | 35.1M/262M [00:08<00:42, 5.38MB/s]
downloading ml-25m.zip: 14%|█▎ | 35.6M/262M [00:08<00:43, 5.24MB/s]
downloading ml-25m.zip: 14%|█▍ | 36.2M/262M [00:08<00:45, 4.95MB/s]
downloading ml-25m.zip: 14%|█▍ | 36.7M/262M [00:08<00:45, 4.98MB/s]
downloading ml-25m.zip: 14%|█▍ | 37.3M/262M [00:08<00:42, 5.25MB/s]
downloading ml-25m.zip: 14%|█▍ | 37.9M/262M [00:08<00:42, 5.28MB/s]
downloading ml-25m.zip: 15%|█▍ | 38.4M/262M [00:08<00:42, 5.24MB/s]
downloading ml-25m.zip: 15%|█▍ | 39.0M/262M [00:08<00:41, 5.34MB/s]
downloading ml-25m.zip: 15%|█▌ | 39.5M/262M [00:08<00:42, 5.23MB/s]
downloading ml-25m.zip: 15%|█▌ | 40.1M/262M [00:09<00:42, 5.22MB/s]
downloading ml-25m.zip: 16%|█▌ | 40.8M/262M [00:09<00:41, 5.32MB/s]
downloading ml-25m.zip: 16%|█▌ | 41.4M/262M [00:09<00:41, 5.36MB/s]
downloading ml-25m.zip: 16%|█▌ | 42.0M/262M [00:09<00:40, 5.38MB/s]
downloading ml-25m.zip: 16%|█▋ | 42.6M/262M [00:09<00:40, 5.41MB/s]
downloading ml-25m.zip: 17%|█▋ | 43.3M/262M [00:09<00:40, 5.44MB/s]
downloading ml-25m.zip: 17%|█▋ | 43.8M/262M [00:09<00:39, 5.46MB/s]
downloading ml-25m.zip: 17%|█▋ | 44.4M/262M [00:09<00:40, 5.38MB/s]
downloading ml-25m.zip: 17%|█▋ | 44.9M/262M [00:09<00:40, 5.35MB/s]
downloading ml-25m.zip: 17%|█▋ | 45.5M/262M [00:10<00:40, 5.32MB/s]
downloading ml-25m.zip: 18%|█▊ | 46.1M/262M [00:10<00:40, 5.36MB/s]
downloading ml-25m.zip: 18%|█▊ | 46.7M/262M [00:10<00:38, 5.53MB/s]
downloading ml-25m.zip: 18%|█▊ | 47.3M/262M [00:10<00:38, 5.53MB/s]
downloading ml-25m.zip: 18%|█▊ | 47.9M/262M [00:10<00:39, 5.37MB/s]
downloading ml-25m.zip: 18%|█▊ | 48.4M/262M [00:10<00:40, 5.29MB/s]
downloading ml-25m.zip: 19%|█▊ | 49.0M/262M [00:10<00:38, 5.49MB/s]
downloading ml-25m.zip: 19%|█▉ | 49.6M/262M [00:10<00:38, 5.48MB/s]
downloading ml-25m.zip: 19%|█▉ | 50.2M/262M [00:10<00:38, 5.53MB/s]
downloading ml-25m.zip: 19%|█▉ | 50.7M/262M [00:11<00:38, 5.47MB/s]
downloading ml-25m.zip: 20%|█▉ | 51.3M/262M [00:11<00:39, 5.39MB/s]
downloading ml-25m.zip: 20%|█▉ | 51.9M/262M [00:11<00:39, 5.37MB/s]
downloading ml-25m.zip: 20%|██ | 52.5M/262M [00:11<00:37, 5.56MB/s]
downloading ml-25m.zip: 20%|██ | 53.0M/262M [00:11<00:37, 5.54MB/s]
downloading ml-25m.zip: 20%|██ | 53.6M/262M [00:11<00:38, 5.39MB/s]
downloading ml-25m.zip: 21%|██ | 54.1M/262M [00:11<00:38, 5.39MB/s]
downloading ml-25m.zip: 21%|██ | 54.7M/262M [00:11<00:39, 5.31MB/s]
downloading ml-25m.zip: 21%|██ | 55.4M/262M [00:11<00:38, 5.41MB/s]
downloading ml-25m.zip: 21%|██▏ | 56.0M/262M [00:12<00:37, 5.50MB/s]
downloading ml-25m.zip: 22%|██▏ | 56.7M/262M [00:12<00:37, 5.54MB/s]
downloading ml-25m.zip: 22%|██▏ | 57.3M/262M [00:12<00:35, 5.69MB/s]
downloading ml-25m.zip: 22%|██▏ | 57.9M/262M [00:12<00:36, 5.65MB/s]
downloading ml-25m.zip: 22%|██▏ | 58.4M/262M [00:12<00:36, 5.63MB/s]
downloading ml-25m.zip: 23%|██▎ | 59.0M/262M [00:12<00:36, 5.51MB/s]
downloading ml-25m.zip: 23%|██▎ | 59.6M/262M [00:12<00:36, 5.61MB/s]
downloading ml-25m.zip: 23%|██▎ | 60.2M/262M [00:12<00:35, 5.73MB/s]
downloading ml-25m.zip: 23%|██▎ | 60.8M/262M [00:12<00:35, 5.65MB/s]
downloading ml-25m.zip: 23%|██▎ | 61.3M/262M [00:12<00:35, 5.66MB/s]
downloading ml-25m.zip: 24%|██▎ | 62.0M/262M [00:13<00:34, 5.72MB/s]
downloading ml-25m.zip: 24%|██▍ | 62.6M/262M [00:13<00:33, 5.88MB/s]
downloading ml-25m.zip: 24%|██▍ | 63.2M/262M [00:13<00:33, 5.88MB/s]
downloading ml-25m.zip: 24%|██▍ | 63.8M/262M [00:13<00:33, 5.87MB/s]
downloading ml-25m.zip: 25%|██▍ | 64.4M/262M [00:13<00:33, 5.86MB/s]
downloading ml-25m.zip: 25%|██▍ | 65.0M/262M [00:13<00:33, 5.85MB/s]
downloading ml-25m.zip: 25%|██▌ | 65.6M/262M [00:13<00:33, 5.86MB/s]
downloading ml-25m.zip: 25%|██▌ | 66.2M/262M [00:13<00:34, 5.73MB/s]
downloading ml-25m.zip: 25%|██▌ | 66.8M/262M [00:13<00:38, 5.13MB/s]
downloading ml-25m.zip: 26%|██▌ | 67.3M/262M [00:14<00:41, 4.73MB/s]
downloading ml-25m.zip: 26%|██▌ | 67.8M/262M [00:14<00:48, 4.00MB/s]
downloading ml-25m.zip: 26%|██▌ | 68.2M/262M [00:14<00:52, 3.72MB/s]
downloading ml-25m.zip: 26%|██▌ | 68.6M/262M [00:14<00:53, 3.61MB/s]
downloading ml-25m.zip: 26%|██▋ | 69.0M/262M [00:14<00:54, 3.53MB/s]
downloading ml-25m.zip: 26%|██▋ | 69.3M/262M [00:14<00:55, 3.49MB/s]
downloading ml-25m.zip: 27%|██▋ | 69.7M/262M [00:14<00:54, 3.52MB/s]
downloading ml-25m.zip: 27%|██▋ | 70.1M/262M [00:14<00:54, 3.50MB/s]
downloading ml-25m.zip: 27%|██▋ | 70.4M/262M [00:15<00:53, 3.60MB/s]
downloading ml-25m.zip: 27%|██▋ | 70.8M/262M [00:15<00:53, 3.59MB/s]
downloading ml-25m.zip: 27%|██▋ | 71.2M/262M [00:15<00:53, 3.54MB/s]
downloading ml-25m.zip: 27%|██▋ | 71.6M/262M [00:15<00:52, 3.62MB/s]
downloading ml-25m.zip: 27%|██▋ | 72.0M/262M [00:15<00:52, 3.61MB/s]
downloading ml-25m.zip: 28%|██▊ | 72.3M/262M [00:15<00:52, 3.62MB/s]
downloading ml-25m.zip: 28%|██▊ | 72.7M/262M [00:15<00:51, 3.66MB/s]
downloading ml-25m.zip: 28%|██▊ | 73.1M/262M [00:15<00:51, 3.66MB/s]
downloading ml-25m.zip: 28%|██▊ | 73.4M/262M [00:15<00:51, 3.67MB/s]
downloading ml-25m.zip: 28%|██▊ | 73.8M/262M [00:15<00:51, 3.66MB/s]
downloading ml-25m.zip: 28%|██▊ | 74.2M/262M [00:16<00:51, 3.66MB/s]
downloading ml-25m.zip: 28%|██▊ | 74.6M/262M [00:16<00:51, 3.65MB/s]
downloading ml-25m.zip: 29%|██▊ | 75.0M/262M [00:16<00:52, 3.58MB/s]
downloading ml-25m.zip: 29%|██▉ | 75.3M/262M [00:16<00:52, 3.58MB/s]
downloading ml-25m.zip: 29%|██▉ | 75.7M/262M [00:16<00:51, 3.59MB/s]
downloading ml-25m.zip: 29%|██▉ | 76.1M/262M [00:16<00:51, 3.60MB/s]
downloading ml-25m.zip: 29%|██▉ | 76.5M/262M [00:16<00:50, 3.66MB/s]
downloading ml-25m.zip: 29%|██▉ | 76.9M/262M [00:16<00:49, 3.77MB/s]
downloading ml-25m.zip: 29%|██▉ | 77.3M/262M [00:16<00:49, 3.71MB/s]
downloading ml-25m.zip: 30%|██▉ | 77.7M/262M [00:16<00:49, 3.70MB/s]
downloading ml-25m.zip: 30%|██▉ | 78.1M/262M [00:17<00:49, 3.73MB/s]
downloading ml-25m.zip: 30%|██▉ | 78.5M/262M [00:17<00:50, 3.65MB/s]
downloading ml-25m.zip: 30%|███ | 78.8M/262M [00:17<00:54, 3.39MB/s]
downloading ml-25m.zip: 30%|███ | 79.2M/262M [00:17<00:55, 3.29MB/s]
downloading ml-25m.zip: 30%|███ | 79.5M/262M [00:17<00:58, 3.10MB/s]
downloading ml-25m.zip: 30%|███ | 79.8M/262M [00:17<01:01, 2.96MB/s]
downloading ml-25m.zip: 31%|███ | 80.2M/262M [00:17<01:04, 2.81MB/s]
downloading ml-25m.zip: 31%|███ | 80.4M/262M [00:17<01:05, 2.76MB/s]
downloading ml-25m.zip: 31%|███ | 80.8M/262M [00:18<01:03, 2.86MB/s]
downloading ml-25m.zip: 31%|███ | 81.1M/262M [00:18<01:01, 2.92MB/s]
downloading ml-25m.zip: 31%|███ | 81.5M/262M [00:18<01:01, 2.93MB/s]
downloading ml-25m.zip: 31%|███ | 81.8M/262M [00:18<01:16, 2.36MB/s]
downloading ml-25m.zip: 31%|███▏ | 82.1M/262M [00:18<01:10, 2.55MB/s]
downloading ml-25m.zip: 31%|███▏ | 82.4M/262M [00:18<01:13, 2.44MB/s]
downloading ml-25m.zip: 32%|███▏ | 82.7M/262M [00:18<01:13, 2.45MB/s]
downloading ml-25m.zip: 32%|███▏ | 82.9M/262M [00:18<01:13, 2.44MB/s]
downloading ml-25m.zip: 32%|███▏ | 83.2M/262M [00:19<01:15, 2.36MB/s]
downloading ml-25m.zip: 32%|███▏ | 83.4M/262M [00:19<01:15, 2.36MB/s]
downloading ml-25m.zip: 32%|███▏ | 83.7M/262M [00:19<01:14, 2.40MB/s]
downloading ml-25m.zip: 32%|███▏ | 84.0M/262M [00:19<01:12, 2.46MB/s]
downloading ml-25m.zip: 32%|███▏ | 84.3M/262M [00:19<01:11, 2.49MB/s]
downloading ml-25m.zip: 32%|███▏ | 84.6M/262M [00:19<01:08, 2.57MB/s]
downloading ml-25m.zip: 32%|███▏ | 84.9M/262M [00:19<01:07, 2.62MB/s]
downloading ml-25m.zip: 33%|███▎ | 85.2M/262M [00:19<01:07, 2.64MB/s]
downloading ml-25m.zip: 33%|███▎ | 85.5M/262M [00:19<01:06, 2.65MB/s]
downloading ml-25m.zip: 33%|███▎ | 85.8M/262M [00:20<01:06, 2.65MB/s]
downloading ml-25m.zip: 33%|███▎ | 86.2M/262M [00:20<01:05, 2.69MB/s]
downloading ml-25m.zip: 33%|███▎ | 86.5M/262M [00:20<01:04, 2.70MB/s]
downloading ml-25m.zip: 33%|███▎ | 86.8M/262M [00:20<01:02, 2.79MB/s]
downloading ml-25m.zip: 33%|███▎ | 87.1M/262M [00:20<01:03, 2.75MB/s]
downloading ml-25m.zip: 33%|███▎ | 87.3M/262M [00:20<01:03, 2.74MB/s]
downloading ml-25m.zip: 33%|███▎ | 87.6M/262M [00:20<01:04, 2.68MB/s]
downloading ml-25m.zip: 34%|███▎ | 87.9M/262M [00:20<01:05, 2.65MB/s]
downloading ml-25m.zip: 34%|███▎ | 88.2M/262M [00:20<01:02, 2.78MB/s]
downloading ml-25m.zip: 34%|███▍ | 88.5M/262M [00:21<01:00, 2.88MB/s]
downloading ml-25m.zip: 34%|███▍ | 88.8M/262M [00:21<01:01, 2.80MB/s]
downloading ml-25m.zip: 34%|███▍ | 89.1M/262M [00:21<01:01, 2.79MB/s]
downloading ml-25m.zip: 34%|███▍ | 89.4M/262M [00:21<01:04, 2.68MB/s]
downloading ml-25m.zip: 34%|███▍ | 89.7M/262M [00:21<01:02, 2.75MB/s]
downloading ml-25m.zip: 34%|███▍ | 90.0M/262M [00:21<00:59, 2.89MB/s]
downloading ml-25m.zip: 34%|███▍ | 90.3M/262M [00:21<01:00, 2.85MB/s]
downloading ml-25m.zip: 35%|███▍ | 90.6M/262M [00:21<01:00, 2.84MB/s]
downloading ml-25m.zip: 35%|███▍ | 90.9M/262M [00:21<01:00, 2.82MB/s]
downloading ml-25m.zip: 35%|███▍ | 91.3M/262M [00:21<00:59, 2.89MB/s]
downloading ml-25m.zip: 35%|███▍ | 91.6M/262M [00:22<00:58, 2.93MB/s]
downloading ml-25m.zip: 35%|███▌ | 92.0M/262M [00:22<00:57, 2.98MB/s]
downloading ml-25m.zip: 35%|███▌ | 92.3M/262M [00:22<00:55, 3.04MB/s]
downloading ml-25m.zip: 35%|███▌ | 92.7M/262M [00:22<00:55, 3.07MB/s]
downloading ml-25m.zip: 36%|███▌ | 93.1M/262M [00:22<00:54, 3.12MB/s]
downloading ml-25m.zip: 36%|███▌ | 93.4M/262M [00:22<00:53, 3.15MB/s]
downloading ml-25m.zip: 36%|███▌ | 93.8M/262M [00:22<00:53, 3.17MB/s]
downloading ml-25m.zip: 36%|███▌ | 94.2M/262M [00:22<00:52, 3.21MB/s]
downloading ml-25m.zip: 36%|███▌ | 94.5M/262M [00:23<00:51, 3.24MB/s]
downloading ml-25m.zip: 36%|███▌ | 94.9M/262M [00:23<00:51, 3.26MB/s]
downloading ml-25m.zip: 36%|███▋ | 95.3M/262M [00:23<00:50, 3.27MB/s]
downloading ml-25m.zip: 37%|███▋ | 95.7M/262M [00:23<00:50, 3.30MB/s]
downloading ml-25m.zip: 37%|███▋ | 96.1M/262M [00:23<00:50, 3.27MB/s]
downloading ml-25m.zip: 37%|███▋ | 96.4M/262M [00:23<00:48, 3.39MB/s]
downloading ml-25m.zip: 37%|███▋ | 96.8M/262M [00:23<00:48, 3.38MB/s]
downloading ml-25m.zip: 37%|███▋ | 97.1M/262M [00:23<00:49, 3.33MB/s]
downloading ml-25m.zip: 37%|███▋ | 97.5M/262M [00:23<00:49, 3.33MB/s]
downloading ml-25m.zip: 37%|███▋ | 97.8M/262M [00:24<00:49, 3.33MB/s]
downloading ml-25m.zip: 37%|███▋ | 98.2M/262M [00:24<00:48, 3.35MB/s]
downloading ml-25m.zip: 38%|███▊ | 98.6M/262M [00:24<00:48, 3.35MB/s]
downloading ml-25m.zip: 38%|███▊ | 99.0M/262M [00:24<00:47, 3.46MB/s]
downloading ml-25m.zip: 38%|███▊ | 99.4M/262M [00:24<00:47, 3.45MB/s]
downloading ml-25m.zip: 38%|███▊ | 99.7M/262M [00:24<00:49, 3.28MB/s]
downloading ml-25m.zip: 38%|███▊ | 100M/262M [00:24<00:49, 3.28MB/s]
downloading ml-25m.zip: 38%|███▊ | 100M/262M [00:24<00:49, 3.27MB/s]
downloading ml-25m.zip: 38%|███▊ | 101M/262M [00:24<00:51, 3.13MB/s]
downloading ml-25m.zip: 39%|███▊ | 101M/262M [00:24<00:47, 3.38MB/s]
downloading ml-25m.zip: 39%|███▊ | 102M/262M [00:25<00:48, 3.32MB/s]
downloading ml-25m.zip: 39%|███▉ | 102M/262M [00:25<00:46, 3.45MB/s]
downloading ml-25m.zip: 39%|███▉ | 102M/262M [00:25<00:47, 3.36MB/s]
downloading ml-25m.zip: 39%|███▉ | 103M/262M [00:25<00:47, 3.39MB/s]
downloading ml-25m.zip: 39%|███▉ | 103M/262M [00:25<00:46, 3.44MB/s]
downloading ml-25m.zip: 39%|███▉ | 103M/262M [00:25<00:49, 3.21MB/s]
downloading ml-25m.zip: 40%|███▉ | 104M/262M [00:25<00:47, 3.30MB/s]
downloading ml-25m.zip: 40%|███▉ | 104M/262M [00:25<00:45, 3.49MB/s]
downloading ml-25m.zip: 40%|███▉ | 104M/262M [00:25<00:45, 3.46MB/s]
downloading ml-25m.zip: 40%|████ | 105M/262M [00:26<00:45, 3.45MB/s]
downloading ml-25m.zip: 40%|████ | 105M/262M [00:26<00:44, 3.54MB/s]
downloading ml-25m.zip: 40%|████ | 106M/262M [00:26<00:46, 3.39MB/s]
downloading ml-25m.zip: 40%|████ | 106M/262M [00:26<00:44, 3.54MB/s]
downloading ml-25m.zip: 41%|████ | 106M/262M [00:26<00:43, 3.61MB/s]
downloading ml-25m.zip: 41%|████ | 107M/262M [00:26<00:41, 3.75MB/s]
downloading ml-25m.zip: 41%|████ | 107M/262M [00:26<00:43, 3.60MB/s]
downloading ml-25m.zip: 41%|████ | 108M/262M [00:26<00:41, 3.74MB/s]
downloading ml-25m.zip: 41%|████▏ | 108M/262M [00:26<00:39, 3.90MB/s]
downloading ml-25m.zip: 41%|████▏ | 108M/262M [00:27<00:39, 3.90MB/s]
downloading ml-25m.zip: 42%|████▏ | 109M/262M [00:27<00:40, 3.80MB/s]
downloading ml-25m.zip: 42%|████▏ | 109M/262M [00:27<00:39, 3.90MB/s]
downloading ml-25m.zip: 42%|████▏ | 110M/262M [00:27<00:37, 4.10MB/s]
downloading ml-25m.zip: 42%|████▏ | 110M/262M [00:27<00:36, 4.18MB/s]
downloading ml-25m.zip: 42%|████▏ | 111M/262M [00:27<00:36, 4.11MB/s]
downloading ml-25m.zip: 42%|████▏ | 111M/262M [00:27<00:37, 4.04MB/s]
downloading ml-25m.zip: 43%|████▎ | 112M/262M [00:27<00:36, 4.12MB/s]
downloading ml-25m.zip: 43%|████▎ | 112M/262M [00:27<00:34, 4.30MB/s]
downloading ml-25m.zip: 43%|████▎ | 113M/262M [00:28<00:33, 4.41MB/s]
downloading ml-25m.zip: 43%|████▎ | 113M/262M [00:28<00:32, 4.56MB/s]
downloading ml-25m.zip: 43%|████▎ | 114M/262M [00:28<00:31, 4.66MB/s]
downloading ml-25m.zip: 44%|████▎ | 114M/262M [00:28<00:30, 4.82MB/s]
downloading ml-25m.zip: 44%|████▍ | 115M/262M [00:28<00:29, 4.91MB/s]
downloading ml-25m.zip: 44%|████▍ | 115M/262M [00:28<00:29, 5.03MB/s]
downloading ml-25m.zip: 44%|████▍ | 116M/262M [00:28<00:28, 5.16MB/s]
downloading ml-25m.zip: 45%|████▍ | 117M/262M [00:28<00:27, 5.29MB/s]
downloading ml-25m.zip: 45%|████▍ | 117M/262M [00:28<00:26, 5.44MB/s]
downloading ml-25m.zip: 45%|████▌ | 118M/262M [00:29<00:25, 5.59MB/s]
downloading ml-25m.zip: 45%|████▌ | 119M/262M [00:29<00:25, 5.72MB/s]
downloading ml-25m.zip: 46%|████▌ | 119M/262M [00:29<00:24, 5.86MB/s]
downloading ml-25m.zip: 46%|████▌ | 120M/262M [00:29<00:23, 6.04MB/s]
downloading ml-25m.zip: 46%|████▌ | 121M/262M [00:29<00:22, 6.20MB/s]
downloading ml-25m.zip: 46%|████▋ | 122M/262M [00:29<00:22, 6.29MB/s]
downloading ml-25m.zip: 47%|████▋ | 122M/262M [00:29<00:22, 6.26MB/s]
downloading ml-25m.zip: 47%|████▋ | 123M/262M [00:29<00:21, 6.48MB/s]
downloading ml-25m.zip: 47%|████▋ | 124M/262M [00:29<00:20, 6.84MB/s]
downloading ml-25m.zip: 48%|████▊ | 125M/262M [00:30<00:19, 7.17MB/s]
downloading ml-25m.zip: 48%|████▊ | 125M/262M [00:30<00:19, 7.18MB/s]
downloading ml-25m.zip: 48%|████▊ | 126M/262M [00:30<00:19, 7.10MB/s]
downloading ml-25m.zip: 48%|████▊ | 127M/262M [00:30<00:17, 7.56MB/s]
downloading ml-25m.zip: 49%|████▉ | 128M/262M [00:30<00:16, 7.96MB/s]
downloading ml-25m.zip: 49%|████▉ | 129M/262M [00:30<00:16, 7.95MB/s]
downloading ml-25m.zip: 49%|████▉ | 129M/262M [00:30<00:16, 7.88MB/s]
downloading ml-25m.zip: 50%|████▉ | 130M/262M [00:30<00:16, 8.09MB/s]
downloading ml-25m.zip: 50%|█████ | 131M/262M [00:30<00:15, 8.39MB/s]
downloading ml-25m.zip: 51%|█████ | 133M/262M [00:30<00:14, 8.93MB/s]
downloading ml-25m.zip: 51%|█████ | 134M/262M [00:31<00:13, 9.22MB/s]
downloading ml-25m.zip: 51%|█████▏ | 135M/262M [00:31<00:13, 9.34MB/s]
downloading ml-25m.zip: 52%|█████▏ | 135M/262M [00:31<00:13, 9.31MB/s]
downloading ml-25m.zip: 52%|█████▏ | 137M/262M [00:31<00:13, 9.64MB/s]
downloading ml-25m.zip: 53%|█████▎ | 138M/262M [00:31<00:12, 10.2MB/s]
downloading ml-25m.zip: 53%|█████▎ | 139M/262M [00:31<00:11, 10.4MB/s]
downloading ml-25m.zip: 53%|█████▎ | 140M/262M [00:31<00:11, 10.4MB/s]
downloading ml-25m.zip: 54%|█████▍ | 141M/262M [00:31<00:11, 10.5MB/s]
downloading ml-25m.zip: 54%|█████▍ | 142M/262M [00:31<00:10, 11.2MB/s]
downloading ml-25m.zip: 55%|█████▍ | 143M/262M [00:31<00:10, 11.1MB/s]
downloading ml-25m.zip: 55%|█████▌ | 145M/262M [00:32<00:10, 11.4MB/s]
downloading ml-25m.zip: 56%|█████▌ | 146M/262M [00:32<00:09, 11.7MB/s]
downloading ml-25m.zip: 56%|█████▌ | 147M/262M [00:32<00:09, 11.9MB/s]
downloading ml-25m.zip: 57%|█████▋ | 148M/262M [00:32<00:09, 11.9MB/s]
downloading ml-25m.zip: 57%|█████▋ | 150M/262M [00:32<00:09, 12.4MB/s]
downloading ml-25m.zip: 58%|█████▊ | 151M/262M [00:32<00:08, 12.5MB/s]
downloading ml-25m.zip: 58%|█████▊ | 152M/262M [00:32<00:08, 13.1MB/s]
downloading ml-25m.zip: 59%|█████▊ | 154M/262M [00:32<00:08, 13.3MB/s]
downloading ml-25m.zip: 59%|█████▉ | 155M/262M [00:32<00:08, 12.9MB/s]
downloading ml-25m.zip: 60%|█████▉ | 156M/262M [00:32<00:08, 13.0MB/s]
downloading ml-25m.zip: 60%|██████ | 158M/262M [00:33<00:07, 13.6MB/s]
downloading ml-25m.zip: 61%|██████ | 160M/262M [00:33<00:07, 14.2MB/s]
downloading ml-25m.zip: 62%|██████▏ | 161M/262M [00:33<00:06, 14.5MB/s]
downloading ml-25m.zip: 62%|██████▏ | 163M/262M [00:33<00:06, 15.1MB/s]
downloading ml-25m.zip: 63%|██████▎ | 165M/262M [00:33<00:06, 15.5MB/s]
downloading ml-25m.zip: 63%|██████▎ | 166M/262M [00:33<00:05, 16.3MB/s]
downloading ml-25m.zip: 64%|██████▍ | 168M/262M [00:33<00:05, 16.6MB/s]
downloading ml-25m.zip: 65%|██████▍ | 170M/262M [00:33<00:05, 16.5MB/s]
downloading ml-25m.zip: 65%|██████▌ | 171M/262M [00:33<00:05, 16.2MB/s]
downloading ml-25m.zip: 66%|██████▌ | 173M/262M [00:34<00:05, 16.8MB/s]
downloading ml-25m.zip: 67%|██████▋ | 175M/262M [00:34<00:04, 17.6MB/s]
downloading ml-25m.zip: 68%|██████▊ | 177M/262M [00:34<00:04, 18.0MB/s]
downloading ml-25m.zip: 68%|██████▊ | 179M/262M [00:34<00:04, 18.0MB/s]
downloading ml-25m.zip: 69%|██████▉ | 181M/262M [00:34<00:04, 18.1MB/s]
downloading ml-25m.zip: 70%|██████▉ | 183M/262M [00:34<00:04, 18.3MB/s]
downloading ml-25m.zip: 71%|███████ | 185M/262M [00:34<00:04, 19.2MB/s]
downloading ml-25m.zip: 71%|███████▏ | 187M/262M [00:34<00:03, 19.7MB/s]
downloading ml-25m.zip: 72%|███████▏ | 189M/262M [00:34<00:03, 19.9MB/s]
downloading ml-25m.zip: 73%|███████▎ | 191M/262M [00:34<00:04, 17.2MB/s]
downloading ml-25m.zip: 74%|███████▎ | 193M/262M [00:35<00:04, 14.8MB/s]
downloading ml-25m.zip: 74%|███████▍ | 194M/262M [00:35<00:05, 12.7MB/s]
downloading ml-25m.zip: 75%|███████▍ | 196M/262M [00:35<00:05, 11.9MB/s]
downloading ml-25m.zip: 75%|███████▌ | 197M/262M [00:35<00:05, 11.5MB/s]
downloading ml-25m.zip: 76%|███████▌ | 198M/262M [00:35<00:05, 11.0MB/s]
downloading ml-25m.zip: 76%|███████▌ | 199M/262M [00:35<00:05, 10.7MB/s]
downloading ml-25m.zip: 76%|███████▋ | 200M/262M [00:35<00:05, 10.7MB/s]
downloading ml-25m.zip: 77%|███████▋ | 202M/262M [00:36<00:05, 11.1MB/s]
downloading ml-25m.zip: 77%|███████▋ | 203M/262M [00:36<00:05, 11.1MB/s]
downloading ml-25m.zip: 78%|███████▊ | 204M/262M [00:36<00:05, 11.0MB/s]
downloading ml-25m.zip: 78%|███████▊ | 205M/262M [00:36<00:05, 11.2MB/s]
downloading ml-25m.zip: 79%|███████▊ | 206M/262M [00:36<00:05, 10.9MB/s]
downloading ml-25m.zip: 79%|███████▉ | 207M/262M [00:36<00:04, 11.2MB/s]
downloading ml-25m.zip: 80%|███████▉ | 209M/262M [00:36<00:04, 11.3MB/s]
downloading ml-25m.zip: 80%|████████ | 210M/262M [00:36<00:04, 11.8MB/s]
downloading ml-25m.zip: 81%|████████ | 211M/262M [00:36<00:04, 12.0MB/s]
downloading ml-25m.zip: 81%|████████ | 212M/262M [00:36<00:04, 11.9MB/s]
downloading ml-25m.zip: 82%|████████▏ | 214M/262M [00:37<00:04, 11.9MB/s]
downloading ml-25m.zip: 82%|████████▏ | 215M/262M [00:37<00:03, 12.3MB/s]
downloading ml-25m.zip: 82%|████████▏ | 216M/262M [00:37<00:03, 12.2MB/s]
downloading ml-25m.zip: 83%|████████▎ | 217M/262M [00:37<00:03, 12.2MB/s]
downloading ml-25m.zip: 83%|████████▎ | 219M/262M [00:37<00:03, 12.4MB/s]
downloading ml-25m.zip: 84%|████████▍ | 220M/262M [00:37<00:03, 11.8MB/s]
downloading ml-25m.zip: 84%|████████▍ | 221M/262M [00:37<00:03, 11.3MB/s]
downloading ml-25m.zip: 85%|████████▍ | 222M/262M [00:37<00:03, 11.0MB/s]
downloading ml-25m.zip: 85%|████████▌ | 223M/262M [00:37<00:03, 10.8MB/s]
downloading ml-25m.zip: 86%|████████▌ | 224M/262M [00:38<00:03, 10.7MB/s]
downloading ml-25m.zip: 86%|████████▌ | 226M/262M [00:38<00:03, 10.3MB/s]
downloading ml-25m.zip: 86%|████████▋ | 227M/262M [00:38<00:03, 10.2MB/s]
downloading ml-25m.zip: 87%|████████▋ | 228M/262M [00:38<00:03, 10.2MB/s]
downloading ml-25m.zip: 87%|████████▋ | 229M/262M [00:38<00:03, 10.2MB/s]
downloading ml-25m.zip: 88%|████████▊ | 230M/262M [00:38<00:03, 8.90MB/s]
downloading ml-25m.zip: 88%|████████▊ | 231M/262M [00:38<00:03, 9.01MB/s]
downloading ml-25m.zip: 88%|████████▊ | 231M/262M [00:38<00:03, 8.96MB/s]
downloading ml-25m.zip: 89%|████████▊ | 232M/262M [00:38<00:03, 8.68MB/s]
downloading ml-25m.zip: 89%|████████▉ | 233M/262M [00:39<00:03, 8.95MB/s]
downloading ml-25m.zip: 89%|████████▉ | 234M/262M [00:39<00:03, 9.01MB/s]
downloading ml-25m.zip: 90%|████████▉ | 235M/262M [00:39<00:02, 9.27MB/s]
downloading ml-25m.zip: 90%|█████████ | 236M/262M [00:39<00:02, 9.68MB/s]
downloading ml-25m.zip: 91%|█████████ | 238M/262M [00:39<00:02, 10.2MB/s]
downloading ml-25m.zip: 91%|█████████ | 239M/262M [00:39<00:02, 10.0MB/s]
downloading ml-25m.zip: 91%|█████████▏| 240M/262M [00:39<00:02, 10.0MB/s]
downloading ml-25m.zip: 92%|█████████▏| 241M/262M [00:39<00:01, 10.6MB/s]
downloading ml-25m.zip: 92%|█████████▏| 242M/262M [00:39<00:01, 10.9MB/s]
downloading ml-25m.zip: 93%|█████████▎| 243M/262M [00:39<00:01, 10.9MB/s]
downloading ml-25m.zip: 93%|█████████▎| 244M/262M [00:40<00:01, 10.8MB/s]
downloading ml-25m.zip: 94%|█████████▎| 245M/262M [00:40<00:01, 10.8MB/s]
downloading ml-25m.zip: 94%|█████████▍| 246M/262M [00:40<00:01, 9.76MB/s]
downloading ml-25m.zip: 94%|█████████▍| 247M/262M [00:40<00:01, 9.20MB/s]
downloading ml-25m.zip: 95%|█████████▍| 248M/262M [00:40<00:01, 8.93MB/s]
downloading ml-25m.zip: 95%|█████████▌| 249M/262M [00:40<00:01, 8.81MB/s]
downloading ml-25m.zip: 96%|█████████▌| 250M/262M [00:40<00:01, 8.66MB/s]
downloading ml-25m.zip: 96%|█████████▌| 251M/262M [00:40<00:01, 8.52MB/s]
downloading ml-25m.zip: 96%|█████████▌| 252M/262M [00:40<00:01, 8.49MB/s]
downloading ml-25m.zip: 97%|█████████▋| 253M/262M [00:41<00:01, 8.64MB/s]
downloading ml-25m.zip: 97%|█████████▋| 254M/262M [00:41<00:00, 8.71MB/s]
downloading ml-25m.zip: 97%|█████████▋| 255M/262M [00:41<00:00, 8.65MB/s]
downloading ml-25m.zip: 98%|█████████▊| 256M/262M [00:41<00:00, 8.21MB/s]
downloading ml-25m.zip: 98%|█████████▊| 256M/262M [00:41<00:00, 8.23MB/s]
downloading ml-25m.zip: 98%|█████████▊| 257M/262M [00:41<00:00, 8.04MB/s]
downloading ml-25m.zip: 98%|█████████▊| 258M/262M [00:41<00:00, 7.17MB/s]
downloading ml-25m.zip: 99%|█████████▉| 259M/262M [00:41<00:00, 6.46MB/s]
downloading ml-25m.zip: 99%|█████████▉| 259M/262M [00:41<00:00, 6.29MB/s]
downloading ml-25m.zip: 99%|█████████▉| 260M/262M [00:42<00:00, 6.31MB/s]
downloading ml-25m.zip: 100%|█████████▉| 261M/262M [00:42<00:00, 6.35MB/s]
downloading ml-25m.zip: 100%|█████████▉| 261M/262M [00:42<00:00, 6.45MB/s]
downloading ml-25m.zip: 262MB [00:42, 6.18MB/s]
unzipping files: 0%| | 0/8 [00:00<?, ?files/s]
unzipping files: 25%|██▌ | 2/8 [00:00<00:00, 10.25files/s]
unzipping files: 62%|██████▎ | 5/8 [00:03<00:02, 1.34files/s]
unzipping files: 88%|████████▊ | 7/8 [00:04<00:00, 1.30files/s]
unzipping files: 100%|██████████| 8/8 [00:04<00:00, 1.61files/s]
=============================== warnings summary ===============================
../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18
/var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The nvtabular.framework_utils module is being replaced by the Merlin Models library. Support for importing from nvtabular.framework_utils is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead.
warnings.warn(
tests/unit/systems/test_ensemble.py: 4 warnings
tests/unit/systems/test_export.py: 1 warning
tests/unit/systems/test_inference_ops.py: 2 warnings
tests/unit/systems/test_op_runner.py: 4 warnings
tests/unit/systems/hugectr/test_hugectr.py: 1 warning
/usr/local/lib/python3.8/dist-packages/cudf/core/dataframe.py:1292: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
=========================== short test summary info ============================
FAILED tests/unit/systems/hugectr/test_hugectr.py::test_training - AttributeE...
======= 1 failed, 17 passed, 2 skipped, 16 warnings in 124.49s (0:02:04) =======
Build step 'Execute shell' marked build as failure
Performing Post build task...
Match found for : : True
Logical operation result is TRUE
Running script : #!/bin/bash
cd /var/jenkins_home/
CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log"
[merlin_systems] $ /bin/bash /tmp/jenkins12197240355916266001.sh
Click to view CI Results
GitHub pull request #125 of commit 88883cb15df0f5ae6dd0210c587bc5078dc78580, no merge conflicts.
Running as SYSTEM
Setting status of 88883cb15df0f5ae6dd0210c587bc5078dc78580 to PENDING with url https://10.20.13.93:8080/job/merlin_systems/116/console and message: 'Pending'
Using context: Jenkins
Building on master in workspace /var/jenkins_home/workspace/merlin_systems
using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e
> git rev-parse --is-inside-work-tree # timeout=10
Fetching changes from the remote Git repository
> git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10
Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems
> git --version # timeout=10
using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken
> git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10
> git rev-parse 88883cb15df0f5ae6dd0210c587bc5078dc78580^{commit} # timeout=10
Checking out Revision 88883cb15df0f5ae6dd0210c587bc5078dc78580 (detached)
> git config core.sparsecheckout # timeout=10
> git checkout -f 88883cb15df0f5ae6dd0210c587bc5078dc78580 # timeout=10
Commit message: "hugectr op is green for single hot columns"
> git rev-list --no-walk c06e88c5289b7ee494c9251d032a0a1aff95944f # timeout=10
[merlin_systems] $ /bin/bash /tmp/jenkins5826133664549193553.sh
============================= test session starts ==============================
platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0
rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml
plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0
collected 44 items
tests/unit/test_version.py . [ 2%]
tests/unit/systems/test_ensemble.py ... [ 9%]
tests/unit/systems/test_ensemble_ops.py .. [ 13%]
tests/unit/systems/test_export.py . [ 15%]
tests/unit/systems/test_graph.py . [ 18%]
tests/unit/systems/test_inference_ops.py .. [ 22%]
tests/unit/systems/test_op_runner.py .... [ 31%]
tests/unit/systems/test_tensorflow_inf_op.py ... [ 38%]
tests/unit/systems/fil/test_fil.py ....................... [ 90%]
tests/unit/systems/fil/test_forest.py ... [ 97%]
tests/unit/systems/hugectr/test_hugectr.py . [100%]
=============================== warnings summary ===============================
../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18
/var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The nvtabular.framework_utils module is being replaced by the Merlin Models library. Support for importing from nvtabular.framework_utils is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead.
warnings.warn(
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet]
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet]
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet]
/usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
================== 44 passed, 13 warnings in 78.85s (0:01:18) ==================
Performing Post build task...
Match found for : : True
Logical operation result is TRUE
Running script : #!/bin/bash
cd /var/jenkins_home/
CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log"
[merlin_systems] $ /bin/bash /tmp/jenkins3348732409581959692.sh
Click to view CI Results
GitHub pull request #125 of commit c379fa2fea8d862ddef4478c02ee3ff0c4dcb93d, no merge conflicts.
Running as SYSTEM
Setting status of c379fa2fea8d862ddef4478c02ee3ff0c4dcb93d to PENDING with url https://10.20.13.93:8080/job/merlin_systems/117/console and message: 'Pending'
Using context: Jenkins
Building on master in workspace /var/jenkins_home/workspace/merlin_systems
using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e
> git rev-parse --is-inside-work-tree # timeout=10
Fetching changes from the remote Git repository
> git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10
Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems
> git --version # timeout=10
using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken
> git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10
> git rev-parse c379fa2fea8d862ddef4478c02ee3ff0c4dcb93d^{commit} # timeout=10
Checking out Revision c379fa2fea8d862ddef4478c02ee3ff0c4dcb93d (detached)
> git config core.sparsecheckout # timeout=10
> git checkout -f c379fa2fea8d862ddef4478c02ee3ff0c4dcb93d # timeout=10
Commit message: "Merge branch 'main' of https://github.com/NVIDIA-Merlin/systems into add-hugectr-op"
> git rev-list --no-walk 88883cb15df0f5ae6dd0210c587bc5078dc78580 # timeout=10
[merlin_systems] $ /bin/bash /tmp/jenkins4541704241858875195.sh
============================= test session starts ==============================
platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0
rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml
plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0
collected 48 items
tests/unit/test_version.py . [ 2%]
tests/unit/systems/test_ensemble.py .... [ 10%]
tests/unit/systems/test_ensemble_ops.py .. [ 14%]
tests/unit/systems/test_export.py . [ 16%]
tests/unit/systems/test_graph.py . [ 18%]
tests/unit/systems/test_inference_ops.py .. [ 22%]
tests/unit/systems/test_op_runner.py .... [ 31%]
tests/unit/systems/test_tensorflow_inf_op.py ... [ 37%]
tests/unit/systems/fil/test_fil.py .......................... [ 91%]
tests/unit/systems/fil/test_forest.py ... [ 97%]
tests/unit/systems/hugectr/test_hugectr.py . [100%]
=============================== warnings summary ===============================
../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18
/var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The nvtabular.framework_utils module is being replaced by the Merlin Models library. Support for importing from nvtabular.framework_utils is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead.
warnings.warn(
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet]
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet]
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet]
/usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/fil/test_fil.py::test_binary_classifier_default[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_binary_classifier_with_proba[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_multi_classifier[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_regressor[sklearn_forest_regressor-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_model_file[sklearn_forest_regressor-checkpoint.tl]
/usr/local/lib/python3.8/dist-packages/sklearn/utils/deprecation.py:103: FutureWarning: Attribute n_features_ was deprecated in version 1.0 and will be removed in 1.2. Use n_features_in_ instead.
warnings.warn(msg, category=FutureWarning)
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
================== 48 passed, 18 warnings in 92.52s (0:01:32) ==================
Performing Post build task...
Match found for : : True
Logical operation result is TRUE
Running script : #!/bin/bash
cd /var/jenkins_home/
CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log"
[merlin_systems] $ /bin/bash /tmp/jenkins14795559711341224238.sh
Click to view CI Results
GitHub pull request #125 of commit 7be0df83b88908a01c093ff5d1a77979daeaee8c, no merge conflicts.
Running as SYSTEM
Setting status of 7be0df83b88908a01c093ff5d1a77979daeaee8c to PENDING with url https://10.20.13.93:8080/job/merlin_systems/118/console and message: 'Pending'
Using context: Jenkins
Building on master in workspace /var/jenkins_home/workspace/merlin_systems
using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e
> git rev-parse --is-inside-work-tree # timeout=10
Fetching changes from the remote Git repository
> git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10
Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems
> git --version # timeout=10
using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken
> git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10
> git rev-parse 7be0df83b88908a01c093ff5d1a77979daeaee8c^{commit} # timeout=10
Checking out Revision 7be0df83b88908a01c093ff5d1a77979daeaee8c (detached)
> git config core.sparsecheckout # timeout=10
> git checkout -f 7be0df83b88908a01c093ff5d1a77979daeaee8c # timeout=10
Commit message: "add skip for module and add init"
> git rev-list --no-walk c379fa2fea8d862ddef4478c02ee3ff0c4dcb93d # timeout=10
[merlin_systems] $ /bin/bash /tmp/jenkins10365529385200628683.sh
============================= test session starts ==============================
platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0
rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml
plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0
collected 48 items
tests/unit/test_version.py . [ 2%]
tests/unit/systems/test_ensemble.py .... [ 10%]
tests/unit/systems/test_ensemble_ops.py .. [ 14%]
tests/unit/systems/test_export.py . [ 16%]
tests/unit/systems/test_graph.py . [ 18%]
tests/unit/systems/test_inference_ops.py .. [ 22%]
tests/unit/systems/test_op_runner.py .... [ 31%]
tests/unit/systems/test_tensorflow_inf_op.py ... [ 37%]
tests/unit/systems/fil/test_fil.py .......................... [ 91%]
tests/unit/systems/fil/test_forest.py ... [ 97%]
tests/unit/systems/hugectr/test_hugectr.py . [100%]
=============================== warnings summary ===============================
../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18
/var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The nvtabular.framework_utils module is being replaced by the Merlin Models library. Support for importing from nvtabular.framework_utils is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead.
warnings.warn(
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet]
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet]
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet]
/usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/fil/test_fil.py::test_binary_classifier_default[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_binary_classifier_with_proba[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_multi_classifier[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_regressor[sklearn_forest_regressor-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_model_file[sklearn_forest_regressor-checkpoint.tl]
/usr/local/lib/python3.8/dist-packages/sklearn/utils/deprecation.py:103: FutureWarning: Attribute n_features_ was deprecated in version 1.0 and will be removed in 1.2. Use n_features_in_ instead.
warnings.warn(msg, category=FutureWarning)
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
================= 48 passed, 18 warnings in 418.29s (0:06:58) ==================
Performing Post build task...
Match found for : : True
Logical operation result is TRUE
Running script : #!/bin/bash
cd /var/jenkins_home/
CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log"
[merlin_systems] $ /bin/bash /tmp/jenkins10001184135030396013.sh
Click to view CI Results
GitHub pull request #125 of commit 1bbda7b9aedf11d2bc56b4542a26f7a3db8872fb, no merge conflicts.
Running as SYSTEM
Setting status of 1bbda7b9aedf11d2bc56b4542a26f7a3db8872fb to PENDING with url https://10.20.13.93:8080/job/merlin_systems/119/console and message: 'Pending'
Using context: Jenkins
Building on master in workspace /var/jenkins_home/workspace/merlin_systems
using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e
> git rev-parse --is-inside-work-tree # timeout=10
Fetching changes from the remote Git repository
> git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10
Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems
> git --version # timeout=10
using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken
> git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10
> git rev-parse 1bbda7b9aedf11d2bc56b4542a26f7a3db8872fb^{commit} # timeout=10
Checking out Revision 1bbda7b9aedf11d2bc56b4542a26f7a3db8872fb (detached)
> git config core.sparsecheckout # timeout=10
> git checkout -f 1bbda7b9aedf11d2bc56b4542a26f7a3db8872fb # timeout=10
Commit message: "remove common folder in tests and remove unneeded lines in test hugectr"
> git rev-list --no-walk 7be0df83b88908a01c093ff5d1a77979daeaee8c # timeout=10
[merlin_systems] $ /bin/bash /tmp/jenkins8677967745791111808.sh
============================= test session starts ==============================
platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0
rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml
plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0
collected 48 items
tests/unit/test_version.py . [ 2%]
tests/unit/systems/test_ensemble.py .... [ 10%]
tests/unit/systems/test_ensemble_ops.py .. [ 14%]
tests/unit/systems/test_export.py . [ 16%]
tests/unit/systems/test_graph.py . [ 18%]
tests/unit/systems/test_inference_ops.py .. [ 22%]
tests/unit/systems/test_op_runner.py .... [ 31%]
tests/unit/systems/test_tensorflow_inf_op.py ... [ 37%]
tests/unit/systems/fil/test_fil.py .......................... [ 91%]
tests/unit/systems/fil/test_forest.py ... [ 97%]
tests/unit/systems/hugectr/test_hugectr.py . [100%]
=============================== warnings summary ===============================
../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18
/var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The nvtabular.framework_utils module is being replaced by the Merlin Models library. Support for importing from nvtabular.framework_utils is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead.
warnings.warn(
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet]
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet]
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet]
/usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/fil/test_fil.py::test_binary_classifier_default[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_binary_classifier_with_proba[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_multi_classifier[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_regressor[sklearn_forest_regressor-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_model_file[sklearn_forest_regressor-checkpoint.tl]
/usr/local/lib/python3.8/dist-packages/sklearn/utils/deprecation.py:103: FutureWarning: Attribute n_features_ was deprecated in version 1.0 and will be removed in 1.2. Use n_features_in_ instead.
warnings.warn(msg, category=FutureWarning)
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
================= 48 passed, 18 warnings in 316.87s (0:05:16) ==================
Performing Post build task...
Match found for : : True
Logical operation result is TRUE
Running script : #!/bin/bash
cd /var/jenkins_home/
CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log"
[merlin_systems] $ /bin/bash /tmp/jenkins17829727239229261377.sh
Click to view CI Results
GitHub pull request #125 of commit 80521b272bf84315d24b2f8fb94a28011e4aedf3, no merge conflicts.
Running as SYSTEM
Setting status of 80521b272bf84315d24b2f8fb94a28011e4aedf3 to PENDING with url https://10.20.13.93:8080/job/merlin_systems/120/console and message: 'Pending'
Using context: Jenkins
Building on master in workspace /var/jenkins_home/workspace/merlin_systems
using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e
> git rev-parse --is-inside-work-tree # timeout=10
Fetching changes from the remote Git repository
> git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10
Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems
> git --version # timeout=10
using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken
> git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10
> git rev-parse 80521b272bf84315d24b2f8fb94a28011e4aedf3^{commit} # timeout=10
Checking out Revision 80521b272bf84315d24b2f8fb94a28011e4aedf3 (detached)
> git config core.sparsecheckout # timeout=10
> git checkout -f 80521b272bf84315d24b2f8fb94a28011e4aedf3 # timeout=10
Commit message: "got hugectr wrapper op PredictHugeCTR working correctly"
> git rev-list --no-walk 1bbda7b9aedf11d2bc56b4542a26f7a3db8872fb # timeout=10
[merlin_systems] $ /bin/bash /tmp/jenkins1443726222047821983.sh
============================= test session starts ==============================
platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0
rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml
plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0
collected 49 items
tests/unit/test_version.py . [ 2%]
tests/unit/systems/test_ensemble.py .... [ 10%]
tests/unit/systems/test_ensemble_ops.py .. [ 14%]
tests/unit/systems/test_export.py . [ 16%]
tests/unit/systems/test_graph.py . [ 18%]
tests/unit/systems/test_inference_ops.py .. [ 22%]
tests/unit/systems/test_op_runner.py .... [ 30%]
tests/unit/systems/test_tensorflow_inf_op.py ... [ 36%]
tests/unit/systems/fil/test_fil.py .......................... [ 89%]
tests/unit/systems/fil/test_forest.py F.. [ 95%]
tests/unit/systems/hugectr/test_hugectr.py sF [100%]
=================================== FAILURES ===================================
____________________________ test_load_from_config _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-7/test_load_from_config0')
def test_load_from_config(tmpdir):
rows = 200
num_features = 16
X, y = sklearn.datasets.make_regression(
n_samples=rows,
n_features=num_features,
n_informative=num_features // 3,
random_state=0,
)
model = xgboost.XGBRegressor()
model.fit(X, y)
feature_names = [str(i) for i in range(num_features)]
input_schema = Schema([ColumnSchema(col, dtype=np.float32) for col in feature_names])
output_schema = Schema([ColumnSchema("output__0", dtype=np.float32)])
config = PredictForest(model, input_schema).export(
tmpdir, input_schema, output_schema, node_id=2
)
node_config = json.loads(config.parameters[config.name].string_value)
assert json.loads(node_config["output_dict"]) == {
"output__0": {"dtype": "float32", "is_list": False, "is_ragged": False}
}
E AssertionError: assert {'output__0':..., 'tags': []}} == {'output__0':...gged': False}}
E Differing items:
E {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False, 'tags': []}} != {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False}}
E Full diff:
E {
E 'output__0': {'dtype': 'float32',
E 'is_list': False,
E - 'is_ragged': False},
E ? -
E + 'is_ragged': False,
E + 'tags': []},
E }
tests/unit/systems/fil/test_forest.py:57: AssertionError
_____________________________ test_predict_hugectr _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0')
def test_predict_hugectr(tmpdir):
cat_dtypes = {"a": int, "b": int, "c": int}
categorical_columns = ["a", "b", "c"]
gdf = make_df(
{
"a": np.arange(64, dtype=np.int64),
"b": np.arange(64, dtype=np.int64),
"c": np.arange(64, dtype=np.int64),
"d": np.random.rand(64).tolist(),
"label": [0] * 64,
},
)
gdf["label"] = gdf["label"].astype("float32")
gdf["d"] = gdf["d"].astype("float32")
train_dataset = nvt.Dataset(gdf)
dense_columns = ["d"]
dict_dtypes = {}
col_schemas = train_dataset.schema.column_schemas
for col in dense_columns:
col_schemas[col] = col_schemas[col].with_tags(Tags.CONTINUOUS)
dict_dtypes[col] = np.float32
for col in categorical_columns:
col_schemas[col] = col_schemas[col].with_tags(Tags.CATEGORICAL)
dict_dtypes[col] = np.int64
for col in ["label"]:
col_schemas[col] = col_schemas[col].with_tags(Tags.TARGET)
dict_dtypes[col] = np.float32
train_path = os.path.join(tmpdir, "train/")
os.mkdir(train_path)
train_dataset.to_parquet(
output_path=train_path,
shuffle=nvt.io.Shuffle.PER_PARTITION,
cats=categorical_columns,
conts=dense_columns,
labels=["label"],
dtypes=dict_dtypes,
)
embeddings = {"a": (64, 16), "b": (64, 16), "c": (64, 16)}
total_cardinality = 0
slot_sizes = []
for column in cat_dtypes:
slot_sizes.append(embeddings[column][0])
total_cardinality += embeddings[column][0]
# slot sizes = list of caridinalities per column, total is sum of individual
model = _run_model(slot_sizes, train_path, len(dense_columns))
model_op = PredictHugeCTR(model, train_dataset.schema, max_nnz=2, device_list=[0])
model_repository_path = os.path.join(tmpdir, "model_repository")
input_schema = train_dataset.schema
triton_chain = input_schema.column_names >> model_op
ens = Ensemble(triton_chain, input_schema)
os.makedirs(model_repository_path)
enc_config, node_configs = ens.export(model_repository_path)
assert enc_config
assert len(node_configs) == 1
assert node_configs[0].name == "0_predicthugectr"
df = train_dataset.to_ddf().compute()[:5]
dense, cats, rowptr = _convert(df, slot_sizes, categorical_columns, labels=["label"])
response = _run_ensemble_on_tritonserver(
model_repository_path,
["OUTPUT0"],
df,
"ensemble_model",
backend_config=f"hugectr,ps={tmpdir}/model_repository/ps.json",
)
tests/unit/systems/hugectr/test_hugectr.py:314:
tests/unit/systems/utils/triton.py:39: in _run_ensemble_on_tritonserver
with run_triton_server(tmpdir, backend_config=backend_config) as client:
/usr/lib/python3.8/contextlib.py:113: in enter
return next(self.gen)
modelpath = '/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository'
backend_config = 'hugectr,ps=/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/ps.json'
@contextlib.contextmanager
def run_triton_server(modelpath, backend_config="tensorflow,version=2"):
"""This function starts up a Triton server instance and returns a client to it.
Parameters
----------
modelpath : string
The path to the model to load.
Yields
------
client: tritonclient.InferenceServerClient
The client connected to the Triton server.
"""
cmdline = [
TRITON_SERVER_PATH,
"--model-repository",
modelpath,
f"--backend-config={backend_config}",
]
env = os.environ.copy()
env["CUDA_VISIBLE_DEVICES"] = "0"
with subprocess.Popen(cmdline, env=env) as process:
try:
with grpcclient.InferenceServerClient("localhost:8001") as client:
# wait until server is ready
for _ in range(60):
if process.poll() is not None:
retcode = process.returncode
raise RuntimeError(f"Tritonserver failed to start (ret={retcode})")
E RuntimeError: Tritonserver failed to start (ret=1)
merlin/systems/triton/utils.py:46: RuntimeError
----------------------------- Captured stdout call -----------------------------
HugeCTR Version: 3.7
====================================================Model Init=====================================================
[HCTR][15:29:13.759][WARNING][RK0][main]: The model name is not specified when creating the solver.
[HCTR][15:29:13.759][WARNING][RK0][main]: MPI was already initialized somewhere elese. Lifetime service disabled.
[HCTR][15:29:13.759][INFO][RK0][main]: Global seed is 2631192487
[HCTR][15:29:13.801][INFO][RK0][main]: Device to NUMA mapping:
GPU 0 -> node 0
[HCTR][15:29:14.358][WARNING][RK0][main]: Peer-to-peer access cannot be fully enabled.
[HCTR][15:29:14.358][INFO][RK0][main]: Start all2all warmup
[HCTR][15:29:14.358][INFO][RK0][main]: End all2all warmup
[HCTR][15:29:14.358][INFO][RK0][main]: Using All-reduce algorithm: NCCL
[HCTR][15:29:14.359][INFO][RK0][main]: Device 0: Tesla P100-DGXS-16GB
[HCTR][15:29:14.359][INFO][RK0][main]: num of DataReader workers: 1
[HCTR][15:29:14.359][INFO][RK0][main]: Vocabulary size: 0
[HCTR][15:29:14.359][INFO][RK0][main]: max_vocabulary_size_per_gpu_=584362
[HCTR][15:29:14.359][DEBUG][RK0][tid #140578230626048]: file_name_ /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][15:29:14.360][DEBUG][RK0][tid #140578222233344]: file_name_ /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][15:29:14.361][INFO][RK0][main]: Graph analysis to resolve tensor dependency
===================================================Model Compile===================================================
[HCTR][15:29:14.654][INFO][RK0][main]: gpu0 start to init embedding
[HCTR][15:29:14.654][INFO][RK0][main]: gpu0 init embedding done
[HCTR][15:29:14.655][INFO][RK0][main]: Starting AUC NCCL warm-up
[HCTR][15:29:14.656][INFO][RK0][main]: Warm-up done
===================================================Model Summary===================================================
[HCTR][15:29:14.656][INFO][RK0][main]: label Dense Sparse
label dense data1
(None, 1) (None, 1)
——————————————————————————————————————————————————————————————————————————————————————————————————————————————————
Layer Type Input Name Output Name Output Shape
——————————————————————————————————————————————————————————————————————————————————————————————————————————————————
DistributedSlotSparseEmbeddingHash data1 sparse_embedding1 (None, 3, 16)
InnerProduct dense fc1 (None, 512)
Reshape sparse_embedding1 reshape1 (None, 48)
InnerProduct reshape1 fc2 (None, 1)
fc1
BinaryCrossEntropyLoss fc2 loss
label
=====================================================Model Fit=====================================================
[HCTR][15:29:14.656][INFO][RK0][main]: Use non-epoch mode with number of iterations: 20
[HCTR][15:29:14.656][INFO][RK0][main]: Training batchsize: 10, evaluation batchsize: 10
[HCTR][15:29:14.656][INFO][RK0][main]: Evaluation interval: 200, snapshot interval: 10
[HCTR][15:29:14.656][INFO][RK0][main]: Dense network trainable: True
[HCTR][15:29:14.656][INFO][RK0][main]: Sparse embedding sparse_embedding1 trainable: True
[HCTR][15:29:14.656][INFO][RK0][main]: Use mixed precision: False, scaler: 1.000000, use cuda graph: True
[HCTR][15:29:14.656][INFO][RK0][main]: lr: 0.001000, warmup_steps: 1, end_lr: 0.000000
[HCTR][15:29:14.656][INFO][RK0][main]: decay_start: 0, decay_steps: 1, decay_power: 2.000000
[HCTR][15:29:14.656][INFO][RK0][main]: Training source file: /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/train/file_list.txt
[HCTR][15:29:14.656][INFO][RK0][main]: Evaluation source file: /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/train/file_list.txt
[HCTR][15:29:14.661][DEBUG][RK0][tid #140578230626048]: file_name /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/train/part_0.parquet file_total_rows 64
[HCTR][15:29:14.666][DEBUG][RK0][tid #140578230626048]: file_name_ /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][15:29:14.670][INFO][RK0][main]: Rank0: Write hash table to file
[HCTR][15:29:14.671][INFO][RK0][main]: Dumping sparse weights to files, successful
[HCTR][15:29:14.695][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][15:29:14.731][INFO][RK0][main]: Done
[HCTR][15:29:14.750][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][15:29:14.787][INFO][RK0][main]: Done
[HCTR][15:29:14.789][INFO][RK0][main]: Dumping sparse optimzer states to files, successful
[HCTR][15:29:14.789][INFO][RK0][main]: Dumping dense weights to file, successful
[HCTR][15:29:14.789][INFO][RK0][main]: Dumping dense optimizer states to file, successful
[HCTR][15:29:14.794][DEBUG][RK0][tid #140578230626048]: file_name_ /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][15:29:14.796][INFO][RK0][main]: Finish 20 iterations with batchsize: 10 in 0.14s.
[HCTR][15:29:14.798][INFO][RK0][main]: Save the model graph to /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json successfully
[HCTR][15:29:14.799][INFO][RK0][main]: Rank0: Write hash table to file
[HCTR][15:29:14.799][INFO][RK0][main]: Dumping sparse weights to files, successful
[HCTR][15:29:14.817][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][15:29:14.852][INFO][RK0][main]: Done
[HCTR][15:29:14.871][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][15:29:14.908][INFO][RK0][main]: Done
[HCTR][15:29:14.910][INFO][RK0][main]: Dumping sparse optimzer states to files, successful
[HCTR][15:29:14.910][INFO][RK0][main]: Dumping dense weights to file, successful
[HCTR][15:29:14.910][INFO][RK0][main]: Dumping dense optimizer states to file, successful
[HCTR][15:29:17.797][INFO][RK0][main]: default_emb_vec_value is not specified using default: 0
[HCTR][15:29:17.797][INFO][RK0][main]: Creating HashMap CPU database backend...
[HCTR][15:29:17.797][INFO][RK0][main]: Volatile DB: initial cache rate = 1
[HCTR][15:29:17.797][INFO][RK0][main]: Volatile DB: cache missed embeddings = 0
[HCTR][15:29:18.056][INFO][RK0][main]: Table: hps_et.0_hugectr.sparse_embedding1; cached 64 / 64 embeddings in volatile database (PreallocatedHashMapBackend); load: 64 / 18446744073709551615 (0.00%).
[HCTR][15:29:18.057][DEBUG][RK0][main]: Real-time subscribers created!
[HCTR][15:29:18.057][INFO][RK0][main]: Create embedding cache in device 0.
[HCTR][15:29:18.057][INFO][RK0][main]: Use GPU embedding cache: True, cache size percentage: 0.500000
[HCTR][15:29:18.057][INFO][RK0][main]: Configured cache hit rate threshold: 0.900000
[HCTR][15:29:18.057][INFO][RK0][main]: The size of thread pool: 16
[HCTR][15:29:18.057][INFO][RK0][main]: The size of worker memory pool: 4
[HCTR][15:29:18.057][INFO][RK0][main]: The size of refresh memory pool: 1
[HCTR][15:29:18.074][INFO][RK0][main]: Global seed is 1817110376
[HCTR][15:29:18.699][WARNING][RK0][main]: Peer-to-peer access cannot be fully enabled.
[HCTR][15:29:18.699][INFO][RK0][main]: Start all2all warmup
[HCTR][15:29:18.699][INFO][RK0][main]: End all2all warmup
[HCTR][15:29:18.700][INFO][RK0][main]: Create inference session on device: 0
[HCTR][15:29:18.700][INFO][RK0][main]: Model name: 0_hugectr
[HCTR][15:29:18.700][INFO][RK0][main]: Use mixed precision: False
[HCTR][15:29:18.700][INFO][RK0][main]: Use cuda graph: True
[HCTR][15:29:18.700][INFO][RK0][main]: Max batchsize: 64
[HCTR][15:29:18.700][INFO][RK0][main]: Use I64 input key: True
[HCTR][15:29:18.700][INFO][RK0][main]: start create embedding for inference
[HCTR][15:29:18.700][INFO][RK0][main]: sparse_input name data1
[HCTR][15:29:18.700][INFO][RK0][main]: create embedding for inference success
[HCTR][15:29:18.700][INFO][RK0][main]: Inference stage skip BinaryCrossEntropyLoss layer, replaced by Sigmoid layer
----------------------------- Captured stderr call -----------------------------
I0701 15:29:15.199327 7869 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7f2414000000' with size 268435456
I0701 15:29:15.200112 7869 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864
I0701 15:29:15.203367 7869 model_repository_manager.cc:1191] loading: 0_predicthugectr:1
I0701 15:29:15.303720 7869 model_repository_manager.cc:1191] loading: 0_hugectr:1
I0701 15:29:15.311508 7869 python.cc:2388] TRITONBACKEND_ModelInstanceInitialize: 0_predicthugectr (GPU device 0)
0701 15:29:17.406730 7909 pb_stub.cc:301] Failed to initialize Python stub: ModuleNotFoundError: No module named 'merlin.systems.dag.ops.hugectr'
At:
(973): _find_and_load_unlocked
(991): _find_and_load
(1014): _gcd_import
/usr/lib/python3.8/importlib/init.py(127): import_module
/usr/local/lib/python3.8/dist-packages/merlin/systems/dag/op_runner.py(30): init
/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_predicthugectr/1/model.py(66): initialize
E0701 15:29:17.780691 7869 model_repository_manager.cc:1348] failed to load '0_predicthugectr' version 1: Internal: ModuleNotFoundError: No module named 'merlin.systems.dag.ops.hugectr'
At:
(973): _find_and_load_unlocked
(991): _find_and_load
(1014): _gcd_import
/usr/lib/python3.8/importlib/init.py(127): import_module
/usr/local/lib/python3.8/dist-packages/merlin/systems/dag/op_runner.py(30): init
/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_predicthugectr/1/model.py(66): initialize
I0701 15:29:17.796763 7869 hugectr.cc:1738] TRITONBACKEND_Initialize: hugectr
I0701 15:29:17.796790 7869 hugectr.cc:1745] Triton TRITONBACKEND API version: 1.9
I0701 15:29:17.796800 7869 hugectr.cc:1749] 'hugectr' TRITONBACKEND API version: 1.9
I0701 15:29:17.796807 7869 hugectr.cc:1772] The HugeCTR backend Repository location: /opt/tritonserver/backends/hugectr
I0701 15:29:17.796815 7869 hugectr.cc:1781] The HugeCTR backend configuration: {"cmdline":{"auto-complete-config":"false","backend-directory":"/opt/tritonserver/backends","min-compute-capability":"6.000000","ps":"/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/ps.json","default-max-batch-size":"4"}}
I0701 15:29:17.796839 7869 hugectr.cc:345] *****Parsing Parameter Server Configuration from /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/ps.json
I0701 15:29:17.796897 7869 hugectr.cc:366] Support 64-bit keys = 1
I0701 15:29:17.796932 7869 hugectr.cc:591] Model name = 0_hugectr
I0701 15:29:17.796941 7869 hugectr.cc:600] Model '0_hugectr' -> network file = /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json
I0701 15:29:17.796949 7869 hugectr.cc:607] Model '0_hugectr' -> max. batch size = 64
I0701 15:29:17.796955 7869 hugectr.cc:613] Model '0_hugectr' -> dense model file = /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/_dense_0.model
I0701 15:29:17.796965 7869 hugectr.cc:619] Model '0_hugectr' -> sparse model files = [/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_sparse_0.model]
I0701 15:29:17.796973 7869 hugectr.cc:630] Model '0_hugectr' -> use GPU embedding cache = 1
I0701 15:29:17.796990 7869 hugectr.cc:639] Model '0_hugectr' -> hit rate threshold = 0.9
I0701 15:29:17.796998 7869 hugectr.cc:647] Model '0_hugectr' -> per model GPU cache = 0.5
I0701 15:29:17.797015 7869 hugectr.cc:664] Model '0_hugectr' -> use_mixed_precision = 0
I0701 15:29:17.797024 7869 hugectr.cc:671] Model '0_hugectr' -> scaler = 1
I0701 15:29:17.797031 7869 hugectr.cc:677] Model '0_hugectr' -> use_algorithm_search = 1
I0701 15:29:17.797037 7869 hugectr.cc:685] Model '0_hugectr' -> use_cuda_graph = 1
I0701 15:29:17.797044 7869 hugectr.cc:692] Model '0_hugectr' -> num. pool worker buffers = 4
I0701 15:29:17.797051 7869 hugectr.cc:700] Model '0_hugectr' -> num. pool refresh buffers = 1
I0701 15:29:17.797081 7869 hugectr.cc:708] Model '0_hugectr' -> cache refresh rate per iteration = 0.2
I0701 15:29:17.797090 7869 hugectr.cc:717] Model '0_hugectr' -> deployed device list = [0]
I0701 15:29:17.797098 7869 hugectr.cc:725] Model '0_hugectr' -> default value for each table = [0]
I0701 15:29:17.797105 7869 hugectr.cc:733] Model '0_hugectr' -> maxnum_des_feature_per_sample = 1
I0701 15:29:17.797112 7869 hugectr.cc:741] Model '0_hugectr' -> refresh_delay = 0
I0701 15:29:17.797118 7869 hugectr.cc:747] Model '0_hugectr' -> refresh_interval = 0
I0701 15:29:17.797126 7869 hugectr.cc:755] Model '0_hugectr' -> maxnum_catfeature_query_per_table_per_sample list = [3]
I0701 15:29:17.797133 7869 hugectr.cc:766] Model '0_hugectr' -> embedding_vecsize_per_table list = [16]
I0701 15:29:17.797142 7869 hugectr.cc:773] Model '0_hugectr' -> embedding model names = [, sparse_embedding1]
I0701 15:29:17.797148 7869 hugectr.cc:780] Model '0_hugectr' -> label_dim = 1
I0701 15:29:17.797154 7869 hugectr.cc:785] Model '0_hugectr' -> the number of slots = 3
I0701 15:29:17.797167 7869 hugectr.cc:806] *****The HugeCTR Backend Parameter Server is creating... *****
I0701 15:29:17.797315 7869 hugectr.cc:814] ***** Parameter Server(Int64) is creating... *****
I0701 15:29:18.062446 7869 hugectr.cc:825] *****The HugeCTR Backend Backend created the Parameter Server successfully! *****
I0701 15:29:18.062495 7869 hugectr.cc:1844] TRITONBACKEND_ModelInitialize: 0_hugectr (version 1)
I0701 15:29:18.062503 7869 hugectr.cc:1857] Repository location: /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr
I0701 15:29:18.062510 7869 hugectr.cc:1872] backend configuration in mode: {"cmdline":{"auto-complete-config":"false","backend-directory":"/opt/tritonserver/backends","min-compute-capability":"6.000000","ps":"/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/ps.json","default-max-batch-size":"4"}}
I0701 15:29:18.062521 7869 hugectr.cc:1888] Parsing the latest Parameter Server json config file for deploying model 0_hugectr online
I0701 15:29:18.062528 7869 hugectr.cc:1893] Hierarchical PS version is 0 and the current Model Version is 1
I0701 15:29:18.062534 7869 hugectr.cc:345] *****Parsing Parameter Server Configuration from /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/ps.json
I0701 15:29:18.062576 7869 hugectr.cc:366] Support 64-bit keys = 1
I0701 15:29:18.062597 7869 hugectr.cc:591] Model name = 0_hugectr
I0701 15:29:18.062606 7869 hugectr.cc:600] Model '0_hugectr' -> network file = /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json
I0701 15:29:18.062613 7869 hugectr.cc:607] Model '0_hugectr' -> max. batch size = 64
I0701 15:29:18.062619 7869 hugectr.cc:613] Model '0_hugectr' -> dense model file = /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/_dense_0.model
I0701 15:29:18.062628 7869 hugectr.cc:619] Model '0_hugectr' -> sparse model files = [/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_sparse_0.model]
I0701 15:29:18.062635 7869 hugectr.cc:630] Model '0_hugectr' -> use GPU embedding cache = 1
I0701 15:29:18.062645 7869 hugectr.cc:639] Model '0_hugectr' -> hit rate threshold = 0.9
I0701 15:29:18.062653 7869 hugectr.cc:647] Model '0_hugectr' -> per model GPU cache = 0.5
I0701 15:29:18.062666 7869 hugectr.cc:664] Model '0_hugectr' -> use_mixed_precision = 0
I0701 15:29:18.062674 7869 hugectr.cc:671] Model '0_hugectr' -> scaler = 1
I0701 15:29:18.062680 7869 hugectr.cc:677] Model '0_hugectr' -> use_algorithm_search = 1
I0701 15:29:18.062686 7869 hugectr.cc:685] Model '0_hugectr' -> use_cuda_graph = 1
I0701 15:29:18.062693 7869 hugectr.cc:692] Model '0_hugectr' -> num. pool worker buffers = 4
I0701 15:29:18.062700 7869 hugectr.cc:700] Model '0_hugectr' -> num. pool refresh buffers = 1
I0701 15:29:18.062707 7869 hugectr.cc:708] Model '0_hugectr' -> cache refresh rate per iteration = 0.2
I0701 15:29:18.062714 7869 hugectr.cc:717] Model '0_hugectr' -> deployed device list = [0]
I0701 15:29:18.062746 7869 hugectr.cc:725] Model '0_hugectr' -> default value for each table = [0]
I0701 15:29:18.062753 7869 hugectr.cc:733] Model '0_hugectr' -> maxnum_des_feature_per_sample = 1
I0701 15:29:18.062759 7869 hugectr.cc:741] Model '0_hugectr' -> refresh_delay = 0
I0701 15:29:18.062766 7869 hugectr.cc:747] Model '0_hugectr' -> refresh_interval = 0
I0701 15:29:18.062773 7869 hugectr.cc:755] Model '0_hugectr' -> maxnum_catfeature_query_per_table_per_sample list = [3]
I0701 15:29:18.062780 7869 hugectr.cc:766] Model '0_hugectr' -> embedding_vecsize_per_table list = [16]
I0701 15:29:18.062788 7869 hugectr.cc:773] Model '0_hugectr' -> embedding model names = [, sparse_embedding1]
I0701 15:29:18.062794 7869 hugectr.cc:780] Model '0_hugectr' -> label_dim = 1
I0701 15:29:18.062800 7869 hugectr.cc:785] Model '0_hugectr' -> the number of slots = 3
I0701 15:29:18.063198 7869 hugectr.cc:1078] Verifying model configuration: {
"name": "0_hugectr",
"platform": "",
"backend": "hugectr",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 64,
"input": [
{
"name": "DES",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "CATCOLUMN",
"data_type": "TYPE_INT64",
"format": "FORMAT_NONE",
"dims": [
-1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "ROWINDEX",
"data_type": "TYPE_INT32",
"format": "FORMAT_NONE",
"dims": [
-1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "OUTPUT0",
"data_type": "TYPE_FP32",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"priority": "PRIORITY_DEFAULT",
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"instance_group": [
{
"name": "0_hugectr_0",
"kind": "KIND_GPU",
"count": 1,
"gpus": [
0
],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"label_dim": {
"string_value": "1"
},
"max_nnz": {
"string_value": "2"
},
"gpucacheper": {
"string_value": "0.5"
},
"embedding_vector_size": {
"string_value": "16"
},
"des_feature_num": {
"string_value": "1"
},
"slot_sizes": {
"string_value": "[[64, 64, 64]]"
},
"gpucache": {
"string_value": "true"
},
"embeddingkey_long_type": {
"string_value": "true"
},
"slots": {
"string_value": "3"
},
"cat_feature_num": {
"string_value": "3"
},
"config": {
"string_value": "/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json"
}
},
"model_warmup": []
}
I0701 15:29:18.063227 7869 hugectr.cc:1164] The model configuration: {
"name": "0_hugectr",
"platform": "",
"backend": "hugectr",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 64,
"input": [
{
"name": "DES",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "CATCOLUMN",
"data_type": "TYPE_INT64",
"format": "FORMAT_NONE",
"dims": [
-1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "ROWINDEX",
"data_type": "TYPE_INT32",
"format": "FORMAT_NONE",
"dims": [
-1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "OUTPUT0",
"data_type": "TYPE_FP32",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"priority": "PRIORITY_DEFAULT",
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"instance_group": [
{
"name": "0_hugectr_0",
"kind": "KIND_GPU",
"count": 1,
"gpus": [
0
],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"label_dim": {
"string_value": "1"
},
"max_nnz": {
"string_value": "2"
},
"gpucacheper": {
"string_value": "0.5"
},
"embedding_vector_size": {
"string_value": "16"
},
"des_feature_num": {
"string_value": "1"
},
"slot_sizes": {
"string_value": "[[64, 64, 64]]"
},
"gpucache": {
"string_value": "true"
},
"embeddingkey_long_type": {
"string_value": "true"
},
"slots": {
"string_value": "3"
},
"cat_feature_num": {
"string_value": "3"
},
"config": {
"string_value": "/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json"
}
},
"model_warmup": []
}
I0701 15:29:18.063249 7869 hugectr.cc:1209] slots set = 3
I0701 15:29:18.063256 7869 hugectr.cc:1213] slots set = 3
I0701 15:29:18.063263 7869 hugectr.cc:1221] desene number = 1
I0701 15:29:18.063270 7869 hugectr.cc:1239] The max categorical feature number = 3
I0701 15:29:18.063278 7869 hugectr.cc:1244] embedding size = 16
I0701 15:29:18.063284 7869 hugectr.cc:1250] embedding size = 16
I0701 15:29:18.063291 7869 hugectr.cc:1256] maxnnz = 2
I0701 15:29:18.063299 7869 hugectr.cc:1265] refresh_interval = 0
I0701 15:29:18.063306 7869 hugectr.cc:1273] refresh_delay = 0
I0701 15:29:18.063313 7869 hugectr.cc:1281] HugeCTR model config path = /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json
I0701 15:29:18.063321 7869 hugectr.cc:1329] support mixed_precision = 0
I0701 15:29:18.063331 7869 hugectr.cc:1348] gpu cache per = 0.5
I0701 15:29:18.063339 7869 hugectr.cc:1366] hit-rate threshold = 0.9
I0701 15:29:18.063345 7869 hugectr.cc:1374] Label dim = 1
I0701 15:29:18.063353 7869 hugectr.cc:1383] support 64-bit embedding key = 1
I0701 15:29:18.063359 7869 hugectr.cc:1394] Model_Inference_Para.max_batchsize: 64
I0701 15:29:18.063372 7869 hugectr.cc:1398] max_batch_size in model config.pbtxt is 64
I0701 15:29:18.063380 7869 hugectr.cc:1468] ******Creating Embedding Cache for model 0_hugectr in device 0
I0701 15:29:18.063386 7869 hugectr.cc:1495] ******Creating Embedding Cache for model 0_hugectr successfully
I0701 15:29:18.063743 7869 hugectr.cc:1996] TRITONBACKEND_ModelInstanceInitialize: 0_hugectr_0 (device 0)
I0701 15:29:18.063755 7869 hugectr.cc:1637] Triton Model Instance Initialization on device 0
I0701 15:29:18.063762 7869 hugectr.cc:1647] Dense Feature buffer allocation:
I0701 15:29:18.074275 7869 hugectr.cc:1654] Categorical Feature buffer allocation:
I0701 15:29:18.074316 7869 hugectr.cc:1672] Categorical Row Index buffer allocation:
I0701 15:29:18.074330 7869 hugectr.cc:1680] Predict result buffer allocation:
I0701 15:29:18.074344 7869 hugectr.cc:2009] Loading HugeCTR Model
I0701 15:29:18.074351 7869 hugectr.cc:1698] The model origin json configuration file path is: /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json
I0701 15:29:18.847134 7869 hugectr.cc:1706] ******Loading HugeCTR model successfully
I0701 15:29:18.847304 7869 model_repository_manager.cc:1345] successfully loaded '0_hugectr' version 1
E0701 15:29:18.847379 7869 model_repository_manager.cc:1551] Invalid argument: ensemble 'ensemble_model' depends on '0_predicthugectr' which has no loaded version
I0701 15:29:18.847631 7869 server.cc:556]
+------------------+------+
| Repository Agent | Path |
+------------------+------+
+------------------+------+
I0701 15:29:18.847702 7869 server.cc:583]
+---------+---------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Backend | Path | Config |
+---------+---------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| python | /opt/tritonserver/backends/python/libtriton_python.so | {"cmdline":{"auto-complete-config":"false","min-compute-capability":"6.000000","backend-directory":"/opt/tritonserver/backends","default-max-batch-size":"4"}} |
| hugectr | /opt/tritonserver/backends/hugectr/libtriton_hugectr.so | {"cmdline":{"auto-complete-config":"false","backend-directory":"/opt/tritonserver/backends","min-compute-capability":"6.000000","ps":"/tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/ps.json","default-max-batch-size":"4"}} |
+---------+---------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0701 15:29:18.847764 7869 server.cc:626]
+------------------+---------+----------------------------------------------------------------------------------------------------------------------+
| Model | Version | Status |
+------------------+---------+----------------------------------------------------------------------------------------------------------------------+
| 0_hugectr | 1 | READY |
| 0_predicthugectr | 1 | UNAVAILABLE: Internal: ModuleNotFoundError: No module named 'merlin.systems.dag.ops.hugectr' |
| | | |
| | | At: |
| | | (973): _find_and_load_unlocked |
| | | (991): _find_and_load |
| | | (1014): _gcd_import |
| | | /usr/lib/python3.8/importlib/init.py(127): import_module |
| | | /usr/local/lib/python3.8/dist-packages/merlin/systems/dag/op_runner.py(30): init |
| | | /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository/0_predicthugectr/1/model.py(66): initialize |
+------------------+---------+----------------------------------------------------------------------------------------------------------------------+
I0701 15:29:18.875669 7869 metrics.cc:650] Collecting metrics for GPU 0: Tesla P100-DGXS-16GB
I0701 15:29:18.876569 7869 tritonserver.cc:2138]
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Option | Value |
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| server_id | triton |
| server_version | 2.22.0 |
| server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace |
| model_repository_path[0] | /tmp/pytest-of-jenkins/pytest-7/test_predict_hugectr0/model_repository |
| model_control_mode | MODE_NONE |
| strict_model_config | 1 |
| rate_limit | OFF |
| pinned_memory_pool_byte_size | 268435456 |
| cuda_memory_pool_byte_size{0} | 67108864 |
| response_cache_byte_size | 0 |
| min_supported_compute_capability | 6.0 |
| strict_readiness | 1 |
| exit_timeout | 30 |
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0701 15:29:18.876602 7869 server.cc:257] Waiting for in-flight requests to complete.
I0701 15:29:18.876611 7869 server.cc:273] Timeout 30: Found 0 model versions that have in-flight inferences
I0701 15:29:18.876621 7869 model_repository_manager.cc:1223] unloading: 0_hugectr:1
I0701 15:29:18.876657 7869 server.cc:288] All models are stopped, unloading models
I0701 15:29:18.876665 7869 server.cc:295] Timeout 30: Found 1 live models and 0 in-flight non-inference requests
I0701 15:29:18.876749 7869 hugectr.cc:2026] TRITONBACKEND_ModelInstanceFinalize: delete instance state
I0701 15:29:18.890774 7869 hugectr.cc:1957] TRITONBACKEND_ModelFinalize: delete model state
I0701 15:29:18.899492 7869 hugectr.cc:1505] ******Destorying Embedding Cache for model 0_hugectr successfully
I0701 15:29:18.899532 7869 model_repository_manager.cc:1328] successfully unloaded '0_hugectr' version 1
I0701 15:29:19.876698 7869 server.cc:295] Timeout 29: Found 0 live models and 0 in-flight non-inference requests
I0701 15:29:19.876772 7869 hugectr.cc:1827] TRITONBACKEND_Backend Finalize: HugectrBackend
W0701 15:29:19.995394 7869 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0
W0701 15:29:19.995462 7869 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0
error: creating server: Internal - failed to load all models
W0701 15:29:20.995623 7869 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0
W0701 15:29:20.995678 7869 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0
=============================== warnings summary ===============================
../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18
/var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The nvtabular.framework_utils module is being replaced by the Merlin Models library. Support for importing from nvtabular.framework_utils is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead.
warnings.warn(
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet]
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet]
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet]
/usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/fil/test_fil.py::test_binary_classifier_default[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_binary_classifier_with_proba[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_multi_classifier[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_regressor[sklearn_forest_regressor-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_model_file[sklearn_forest_regressor-checkpoint.tl]
/usr/local/lib/python3.8/dist-packages/sklearn/utils/deprecation.py:103: FutureWarning: Attribute n_features_ was deprecated in version 1.0 and will be removed in 1.2. Use n_features_in_ instead.
warnings.warn(msg, category=FutureWarning)
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
=========================== short test summary info ============================
FAILED tests/unit/systems/fil/test_forest.py::test_load_from_config - Asserti...
FAILED tests/unit/systems/hugectr/test_hugectr.py::test_predict_hugectr - Run...
======= 2 failed, 46 passed, 1 skipped, 18 warnings in 173.06s (0:02:53) =======
Build step 'Execute shell' marked build as failure
Performing Post build task...
Match found for : : True
Logical operation result is TRUE
Running script : #!/bin/bash
cd /var/jenkins_home/
CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log"
[merlin_systems] $ /bin/bash /tmp/jenkins4678809899471159306.sh
Click to view CI Results
GitHub pull request #125 of commit 088570474e008fa0580cb7ae6de1c4a2bceadf4e, no merge conflicts.
Running as SYSTEM
Setting status of 088570474e008fa0580cb7ae6de1c4a2bceadf4e to PENDING with url https://10.20.13.93:8080/job/merlin_systems/121/console and message: 'Pending'
Using context: Jenkins
Building on master in workspace /var/jenkins_home/workspace/merlin_systems
using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e
> git rev-parse --is-inside-work-tree # timeout=10
Fetching changes from the remote Git repository
> git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10
Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems
> git --version # timeout=10
using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken
> git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10
> git rev-parse 088570474e008fa0580cb7ae6de1c4a2bceadf4e^{commit} # timeout=10
Checking out Revision 088570474e008fa0580cb7ae6de1c4a2bceadf4e (detached)
> git config core.sparsecheckout # timeout=10
> git checkout -f 088570474e008fa0580cb7ae6de1c4a2bceadf4e # timeout=10
Commit message: "added check for categorical columns in constructor"
> git rev-list --no-walk 80521b272bf84315d24b2f8fb94a28011e4aedf3 # timeout=10
[merlin_systems] $ /bin/bash /tmp/jenkins16330791337592217810.sh
============================= test session starts ==============================
platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0
rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml
plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0
collected 50 items
tests/unit/test_version.py . [ 2%]
tests/unit/systems/test_ensemble.py .... [ 10%]
tests/unit/systems/test_ensemble_ops.py .. [ 14%]
tests/unit/systems/test_export.py . [ 16%]
tests/unit/systems/test_graph.py . [ 18%]
tests/unit/systems/test_inference_ops.py .. [ 22%]
tests/unit/systems/test_op_runner.py .... [ 30%]
tests/unit/systems/test_tensorflow_inf_op.py ... [ 36%]
tests/unit/systems/fil/test_fil.py .......................... [ 88%]
tests/unit/systems/fil/test_forest.py F.. [ 94%]
tests/unit/systems/hugectr/test_hugectr.py sF. [100%]
=================================== FAILURES ===================================
____________________________ test_load_from_config _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-8/test_load_from_config0')
def test_load_from_config(tmpdir):
rows = 200
num_features = 16
X, y = sklearn.datasets.make_regression(
n_samples=rows,
n_features=num_features,
n_informative=num_features // 3,
random_state=0,
)
model = xgboost.XGBRegressor()
model.fit(X, y)
feature_names = [str(i) for i in range(num_features)]
input_schema = Schema([ColumnSchema(col, dtype=np.float32) for col in feature_names])
output_schema = Schema([ColumnSchema("output__0", dtype=np.float32)])
config = PredictForest(model, input_schema).export(
tmpdir, input_schema, output_schema, node_id=2
)
node_config = json.loads(config.parameters[config.name].string_value)
assert json.loads(node_config["output_dict"]) == {
"output__0": {"dtype": "float32", "is_list": False, "is_ragged": False}
}
E AssertionError: assert {'output__0':..., 'tags': []}} == {'output__0':...gged': False}}
E Differing items:
E {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False, 'tags': []}} != {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False}}
E Full diff:
E {
E 'output__0': {'dtype': 'float32',
E 'is_list': False,
E - 'is_ragged': False},
E ? -
E + 'is_ragged': False,
E + 'tags': []},
E }
tests/unit/systems/fil/test_forest.py:57: AssertionError
_____________________________ test_predict_hugectr _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0')
def test_predict_hugectr(tmpdir):
cat_dtypes = {"a": int, "b": int, "c": int}
categorical_columns = ["a", "b", "c"]
gdf = make_df(
{
"a": np.arange(64, dtype=np.int64),
"b": np.arange(64, dtype=np.int64),
"c": np.arange(64, dtype=np.int64),
"d": np.random.rand(64).tolist(),
"label": [0] * 64,
},
)
gdf["label"] = gdf["label"].astype("float32")
gdf["d"] = gdf["d"].astype("float32")
train_dataset = nvt.Dataset(gdf)
dense_columns = ["d"]
dict_dtypes = {}
col_schemas = train_dataset.schema.column_schemas
for col in dense_columns:
col_schemas[col] = col_schemas[col].with_tags(Tags.CONTINUOUS)
dict_dtypes[col] = np.float32
for col in categorical_columns:
col_schemas[col] = col_schemas[col].with_tags(Tags.CATEGORICAL)
dict_dtypes[col] = np.int64
for col in ["label"]:
col_schemas[col] = col_schemas[col].with_tags(Tags.TARGET)
dict_dtypes[col] = np.float32
train_path = os.path.join(tmpdir, "train/")
os.mkdir(train_path)
train_dataset.to_parquet(
output_path=train_path,
shuffle=nvt.io.Shuffle.PER_PARTITION,
cats=categorical_columns,
conts=dense_columns,
labels=["label"],
dtypes=dict_dtypes,
)
embeddings = {"a": (64, 16), "b": (64, 16), "c": (64, 16)}
total_cardinality = 0
slot_sizes = []
for column in cat_dtypes:
slot_sizes.append(embeddings[column][0])
total_cardinality += embeddings[column][0]
# slot sizes = list of caridinalities per column, total is sum of individual
model = _run_model(slot_sizes, train_path, len(dense_columns))
model_op = PredictHugeCTR(model, train_dataset.schema, max_nnz=2, device_list=[0])
model_repository_path = os.path.join(tmpdir, "model_repository")
input_schema = train_dataset.schema
triton_chain = input_schema.column_names >> model_op
ens = Ensemble(triton_chain, input_schema)
os.makedirs(model_repository_path)
enc_config, node_configs = ens.export(model_repository_path)
assert enc_config
assert len(node_configs) == 1
assert node_configs[0].name == "0_predicthugectr"
df = train_dataset.to_ddf().compute()[:5]
dense, cats, rowptr = _convert(df, slot_sizes, categorical_columns, labels=["label"])
response = _run_ensemble_on_tritonserver(
model_repository_path,
["OUTPUT0"],
df,
"ensemble_model",
backend_config=f"hugectr,ps={tmpdir}/model_repository/ps.json",
)
tests/unit/systems/hugectr/test_hugectr.py:314:
tests/unit/systems/utils/triton.py:39: in _run_ensemble_on_tritonserver
with run_triton_server(tmpdir, backend_config=backend_config) as client:
/usr/lib/python3.8/contextlib.py:113: in enter
return next(self.gen)
modelpath = '/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository'
backend_config = 'hugectr,ps=/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/ps.json'
@contextlib.contextmanager
def run_triton_server(modelpath, backend_config="tensorflow,version=2"):
"""This function starts up a Triton server instance and returns a client to it.
Parameters
----------
modelpath : string
The path to the model to load.
Yields
------
client: tritonclient.InferenceServerClient
The client connected to the Triton server.
"""
cmdline = [
TRITON_SERVER_PATH,
"--model-repository",
modelpath,
f"--backend-config={backend_config}",
]
env = os.environ.copy()
env["CUDA_VISIBLE_DEVICES"] = "0"
with subprocess.Popen(cmdline, env=env) as process:
try:
with grpcclient.InferenceServerClient("localhost:8001") as client:
# wait until server is ready
for _ in range(60):
if process.poll() is not None:
retcode = process.returncode
raise RuntimeError(f"Tritonserver failed to start (ret={retcode})")
E RuntimeError: Tritonserver failed to start (ret=1)
merlin/systems/triton/utils.py:46: RuntimeError
----------------------------- Captured stdout call -----------------------------
HugeCTR Version: 3.7
====================================================Model Init=====================================================
[HCTR][15:39:34.401][WARNING][RK0][main]: The model name is not specified when creating the solver.
[HCTR][15:39:34.401][WARNING][RK0][main]: MPI was already initialized somewhere elese. Lifetime service disabled.
[HCTR][15:39:34.401][INFO][RK0][main]: Global seed is 511996212
[HCTR][15:39:34.444][INFO][RK0][main]: Device to NUMA mapping:
GPU 0 -> node 0
[HCTR][15:39:35.021][WARNING][RK0][main]: Peer-to-peer access cannot be fully enabled.
[HCTR][15:39:35.021][INFO][RK0][main]: Start all2all warmup
[HCTR][15:39:35.021][INFO][RK0][main]: End all2all warmup
[HCTR][15:39:35.022][INFO][RK0][main]: Using All-reduce algorithm: NCCL
[HCTR][15:39:35.022][INFO][RK0][main]: Device 0: Tesla P100-DGXS-16GB
[HCTR][15:39:35.022][INFO][RK0][main]: num of DataReader workers: 1
[HCTR][15:39:35.023][INFO][RK0][main]: Vocabulary size: 0
[HCTR][15:39:35.023][INFO][RK0][main]: max_vocabulary_size_per_gpu_=584362
[HCTR][15:39:35.023][DEBUG][RK0][tid #139698357917440]: file_name_ /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][15:39:35.024][DEBUG][RK0][tid #139698349524736]: file_name_ /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][15:39:35.025][INFO][RK0][main]: Graph analysis to resolve tensor dependency
===================================================Model Compile===================================================
[HCTR][15:39:35.316][INFO][RK0][main]: gpu0 start to init embedding
[HCTR][15:39:35.317][INFO][RK0][main]: gpu0 init embedding done
[HCTR][15:39:35.318][INFO][RK0][main]: Starting AUC NCCL warm-up
[HCTR][15:39:35.319][INFO][RK0][main]: Warm-up done
===================================================Model Summary===================================================
[HCTR][15:39:35.319][INFO][RK0][main]: label Dense Sparse
label dense data1
(None, 1) (None, 1)
——————————————————————————————————————————————————————————————————————————————————————————————————————————————————
Layer Type Input Name Output Name Output Shape
——————————————————————————————————————————————————————————————————————————————————————————————————————————————————
DistributedSlotSparseEmbeddingHash data1 sparse_embedding1 (None, 3, 16)
InnerProduct dense fc1 (None, 512)
Reshape sparse_embedding1 reshape1 (None, 48)
InnerProduct reshape1 fc2 (None, 1)
fc1
BinaryCrossEntropyLoss fc2 loss
label
=====================================================Model Fit=====================================================
[HCTR][15:39:35.319][INFO][RK0][main]: Use non-epoch mode with number of iterations: 20
[HCTR][15:39:35.319][INFO][RK0][main]: Training batchsize: 10, evaluation batchsize: 10
[HCTR][15:39:35.319][INFO][RK0][main]: Evaluation interval: 200, snapshot interval: 10
[HCTR][15:39:35.319][INFO][RK0][main]: Dense network trainable: True
[HCTR][15:39:35.319][INFO][RK0][main]: Sparse embedding sparse_embedding1 trainable: True
[HCTR][15:39:35.319][INFO][RK0][main]: Use mixed precision: False, scaler: 1.000000, use cuda graph: True
[HCTR][15:39:35.319][INFO][RK0][main]: lr: 0.001000, warmup_steps: 1, end_lr: 0.000000
[HCTR][15:39:35.319][INFO][RK0][main]: decay_start: 0, decay_steps: 1, decay_power: 2.000000
[HCTR][15:39:35.319][INFO][RK0][main]: Training source file: /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/train/file_list.txt
[HCTR][15:39:35.319][INFO][RK0][main]: Evaluation source file: /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/train/file_list.txt
[HCTR][15:39:35.324][DEBUG][RK0][tid #139698357917440]: file_name /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/train/part_0.parquet file_total_rows 64
[HCTR][15:39:35.329][DEBUG][RK0][tid #139698357917440]: file_name_ /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][15:39:35.333][INFO][RK0][main]: Rank0: Write hash table to file
[HCTR][15:39:35.333][INFO][RK0][main]: Dumping sparse weights to files, successful
[HCTR][15:39:35.358][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][15:39:35.394][INFO][RK0][main]: Done
[HCTR][15:39:35.413][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][15:39:35.452][INFO][RK0][main]: Done
[HCTR][15:39:35.453][INFO][RK0][main]: Dumping sparse optimzer states to files, successful
[HCTR][15:39:35.453][INFO][RK0][main]: Dumping dense weights to file, successful
[HCTR][15:39:35.454][INFO][RK0][main]: Dumping dense optimizer states to file, successful
[HCTR][15:39:35.459][DEBUG][RK0][tid #139698357917440]: file_name_ /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][15:39:35.461][INFO][RK0][main]: Finish 20 iterations with batchsize: 10 in 0.14s.
[HCTR][15:39:35.463][INFO][RK0][main]: Save the model graph to /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json successfully
[HCTR][15:39:35.464][INFO][RK0][main]: Rank0: Write hash table to file
[HCTR][15:39:35.464][INFO][RK0][main]: Dumping sparse weights to files, successful
[HCTR][15:39:35.482][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][15:39:35.517][INFO][RK0][main]: Done
[HCTR][15:39:35.536][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][15:39:35.574][INFO][RK0][main]: Done
[HCTR][15:39:35.575][INFO][RK0][main]: Dumping sparse optimzer states to files, successful
[HCTR][15:39:35.575][INFO][RK0][main]: Dumping dense weights to file, successful
[HCTR][15:39:35.575][INFO][RK0][main]: Dumping dense optimizer states to file, successful
[HCTR][15:39:38.463][INFO][RK0][main]: default_emb_vec_value is not specified using default: 0
[HCTR][15:39:38.464][INFO][RK0][main]: Creating HashMap CPU database backend...
[HCTR][15:39:38.464][INFO][RK0][main]: Volatile DB: initial cache rate = 1
[HCTR][15:39:38.464][INFO][RK0][main]: Volatile DB: cache missed embeddings = 0
[HCTR][15:39:38.722][INFO][RK0][main]: Table: hps_et.0_hugectr.sparse_embedding1; cached 64 / 64 embeddings in volatile database (PreallocatedHashMapBackend); load: 64 / 18446744073709551615 (0.00%).
[HCTR][15:39:38.722][DEBUG][RK0][main]: Real-time subscribers created!
[HCTR][15:39:38.722][INFO][RK0][main]: Create embedding cache in device 0.
[HCTR][15:39:38.723][INFO][RK0][main]: Use GPU embedding cache: True, cache size percentage: 0.500000
[HCTR][15:39:38.723][INFO][RK0][main]: Configured cache hit rate threshold: 0.900000
[HCTR][15:39:38.723][INFO][RK0][main]: The size of thread pool: 16
[HCTR][15:39:38.723][INFO][RK0][main]: The size of worker memory pool: 4
[HCTR][15:39:38.723][INFO][RK0][main]: The size of refresh memory pool: 1
[HCTR][15:39:38.739][INFO][RK0][main]: Global seed is 450741369
[HCTR][15:39:39.354][WARNING][RK0][main]: Peer-to-peer access cannot be fully enabled.
[HCTR][15:39:39.354][INFO][RK0][main]: Start all2all warmup
[HCTR][15:39:39.354][INFO][RK0][main]: End all2all warmup
[HCTR][15:39:39.355][INFO][RK0][main]: Create inference session on device: 0
[HCTR][15:39:39.355][INFO][RK0][main]: Model name: 0_hugectr
[HCTR][15:39:39.355][INFO][RK0][main]: Use mixed precision: False
[HCTR][15:39:39.355][INFO][RK0][main]: Use cuda graph: True
[HCTR][15:39:39.355][INFO][RK0][main]: Max batchsize: 64
[HCTR][15:39:39.355][INFO][RK0][main]: Use I64 input key: True
[HCTR][15:39:39.355][INFO][RK0][main]: start create embedding for inference
[HCTR][15:39:39.355][INFO][RK0][main]: sparse_input name data1
[HCTR][15:39:39.355][INFO][RK0][main]: create embedding for inference success
[HCTR][15:39:39.355][INFO][RK0][main]: Inference stage skip BinaryCrossEntropyLoss layer, replaced by Sigmoid layer
----------------------------- Captured stderr call -----------------------------
I0701 15:39:35.862152 9240 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7fe326000000' with size 268435456
I0701 15:39:35.862903 9240 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864
I0701 15:39:35.866095 9240 model_repository_manager.cc:1191] loading: 0_predicthugectr:1
I0701 15:39:35.966416 9240 model_repository_manager.cc:1191] loading: 0_hugectr:1
I0701 15:39:35.971023 9240 python.cc:2388] TRITONBACKEND_ModelInstanceInitialize: 0_predicthugectr (GPU device 0)
0701 15:39:38.049032 9280 pb_stub.cc:301] Failed to initialize Python stub: ModuleNotFoundError: No module named 'merlin.systems.dag.ops.hugectr'
At:
(973): _find_and_load_unlocked
(991): _find_and_load
(1014): _gcd_import
/usr/lib/python3.8/importlib/init.py(127): import_module
/usr/local/lib/python3.8/dist-packages/merlin/systems/dag/op_runner.py(30): init
/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_predicthugectr/1/model.py(66): initialize
E0701 15:39:38.437315 9240 model_repository_manager.cc:1348] failed to load '0_predicthugectr' version 1: Internal: ModuleNotFoundError: No module named 'merlin.systems.dag.ops.hugectr'
At:
(973): _find_and_load_unlocked
(991): _find_and_load
(1014): _gcd_import
/usr/lib/python3.8/importlib/init.py(127): import_module
/usr/local/lib/python3.8/dist-packages/merlin/systems/dag/op_runner.py(30): init
/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_predicthugectr/1/model.py(66): initialize
I0701 15:39:38.463504 9240 hugectr.cc:1738] TRITONBACKEND_Initialize: hugectr
I0701 15:39:38.463531 9240 hugectr.cc:1745] Triton TRITONBACKEND API version: 1.9
I0701 15:39:38.463540 9240 hugectr.cc:1749] 'hugectr' TRITONBACKEND API version: 1.9
I0701 15:39:38.463547 9240 hugectr.cc:1772] The HugeCTR backend Repository location: /opt/tritonserver/backends/hugectr
I0701 15:39:38.463555 9240 hugectr.cc:1781] The HugeCTR backend configuration: {"cmdline":{"auto-complete-config":"false","backend-directory":"/opt/tritonserver/backends","min-compute-capability":"6.000000","ps":"/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/ps.json","default-max-batch-size":"4"}}
I0701 15:39:38.463580 9240 hugectr.cc:345] *****Parsing Parameter Server Configuration from /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/ps.json
I0701 15:39:38.463631 9240 hugectr.cc:366] Support 64-bit keys = 1
I0701 15:39:38.463665 9240 hugectr.cc:591] Model name = 0_hugectr
I0701 15:39:38.463673 9240 hugectr.cc:600] Model '0_hugectr' -> network file = /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json
I0701 15:39:38.463681 9240 hugectr.cc:607] Model '0_hugectr' -> max. batch size = 64
I0701 15:39:38.463687 9240 hugectr.cc:613] Model '0_hugectr' -> dense model file = /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/_dense_0.model
I0701 15:39:38.463698 9240 hugectr.cc:619] Model '0_hugectr' -> sparse model files = [/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_sparse_0.model]
I0701 15:39:38.463705 9240 hugectr.cc:630] Model '0_hugectr' -> use GPU embedding cache = 1
I0701 15:39:38.463721 9240 hugectr.cc:639] Model '0_hugectr' -> hit rate threshold = 0.9
I0701 15:39:38.463728 9240 hugectr.cc:647] Model '0_hugectr' -> per model GPU cache = 0.5
I0701 15:39:38.463745 9240 hugectr.cc:664] Model '0_hugectr' -> use_mixed_precision = 0
I0701 15:39:38.463753 9240 hugectr.cc:671] Model '0_hugectr' -> scaler = 1
I0701 15:39:38.463759 9240 hugectr.cc:677] Model '0_hugectr' -> use_algorithm_search = 1
I0701 15:39:38.463765 9240 hugectr.cc:685] Model '0_hugectr' -> use_cuda_graph = 1
I0701 15:39:38.463772 9240 hugectr.cc:692] Model '0_hugectr' -> num. pool worker buffers = 4
I0701 15:39:38.463779 9240 hugectr.cc:700] Model '0_hugectr' -> num. pool refresh buffers = 1
I0701 15:39:38.463808 9240 hugectr.cc:708] Model '0_hugectr' -> cache refresh rate per iteration = 0.2
I0701 15:39:38.463817 9240 hugectr.cc:717] Model '0_hugectr' -> deployed device list = [0]
I0701 15:39:38.463825 9240 hugectr.cc:725] Model '0_hugectr' -> default value for each table = [0]
I0701 15:39:38.463831 9240 hugectr.cc:733] Model '0_hugectr' -> maxnum_des_feature_per_sample = 1
I0701 15:39:38.463838 9240 hugectr.cc:741] Model '0_hugectr' -> refresh_delay = 0
I0701 15:39:38.463844 9240 hugectr.cc:747] Model '0_hugectr' -> refresh_interval = 0
I0701 15:39:38.463852 9240 hugectr.cc:755] Model '0_hugectr' -> maxnum_catfeature_query_per_table_per_sample list = [3]
I0701 15:39:38.463859 9240 hugectr.cc:766] Model '0_hugectr' -> embedding_vecsize_per_table list = [16]
I0701 15:39:38.463867 9240 hugectr.cc:773] Model '0_hugectr' -> embedding model names = [, sparse_embedding1]
I0701 15:39:38.463873 9240 hugectr.cc:780] Model '0_hugectr' -> label_dim = 1
I0701 15:39:38.463880 9240 hugectr.cc:785] Model '0_hugectr' -> the number of slots = 3
I0701 15:39:38.463889 9240 hugectr.cc:806] *****The HugeCTR Backend Parameter Server is creating... *****
I0701 15:39:38.464027 9240 hugectr.cc:814] ***** Parameter Server(Int64) is creating... *****
I0701 15:39:38.727743 9240 hugectr.cc:825] *****The HugeCTR Backend Backend created the Parameter Server successfully! *****
I0701 15:39:38.727792 9240 hugectr.cc:1844] TRITONBACKEND_ModelInitialize: 0_hugectr (version 1)
I0701 15:39:38.727799 9240 hugectr.cc:1857] Repository location: /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr
I0701 15:39:38.727807 9240 hugectr.cc:1872] backend configuration in mode: {"cmdline":{"auto-complete-config":"false","backend-directory":"/opt/tritonserver/backends","min-compute-capability":"6.000000","ps":"/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/ps.json","default-max-batch-size":"4"}}
I0701 15:39:38.727817 9240 hugectr.cc:1888] Parsing the latest Parameter Server json config file for deploying model 0_hugectr online
I0701 15:39:38.727823 9240 hugectr.cc:1893] Hierarchical PS version is 0 and the current Model Version is 1
I0701 15:39:38.727829 9240 hugectr.cc:345] *****Parsing Parameter Server Configuration from /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/ps.json
I0701 15:39:38.727872 9240 hugectr.cc:366] Support 64-bit keys = 1
I0701 15:39:38.727891 9240 hugectr.cc:591] Model name = 0_hugectr
I0701 15:39:38.727899 9240 hugectr.cc:600] Model '0_hugectr' -> network file = /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json
I0701 15:39:38.727906 9240 hugectr.cc:607] Model '0_hugectr' -> max. batch size = 64
I0701 15:39:38.727912 9240 hugectr.cc:613] Model '0_hugectr' -> dense model file = /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/_dense_0.model
I0701 15:39:38.727920 9240 hugectr.cc:619] Model '0_hugectr' -> sparse model files = [/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_sparse_0.model]
I0701 15:39:38.727926 9240 hugectr.cc:630] Model '0_hugectr' -> use GPU embedding cache = 1
I0701 15:39:38.727936 9240 hugectr.cc:639] Model '0_hugectr' -> hit rate threshold = 0.9
I0701 15:39:38.727943 9240 hugectr.cc:647] Model '0_hugectr' -> per model GPU cache = 0.5
I0701 15:39:38.727955 9240 hugectr.cc:664] Model '0_hugectr' -> use_mixed_precision = 0
I0701 15:39:38.727963 9240 hugectr.cc:671] Model '0_hugectr' -> scaler = 1
I0701 15:39:38.727969 9240 hugectr.cc:677] Model '0_hugectr' -> use_algorithm_search = 1
I0701 15:39:38.727974 9240 hugectr.cc:685] Model '0_hugectr' -> use_cuda_graph = 1
I0701 15:39:38.727981 9240 hugectr.cc:692] Model '0_hugectr' -> num. pool worker buffers = 4
I0701 15:39:38.727987 9240 hugectr.cc:700] Model '0_hugectr' -> num. pool refresh buffers = 1
I0701 15:39:38.727994 9240 hugectr.cc:708] Model '0_hugectr' -> cache refresh rate per iteration = 0.2
I0701 15:39:38.728002 9240 hugectr.cc:717] Model '0_hugectr' -> deployed device list = [0]
I0701 15:39:38.728028 9240 hugectr.cc:725] Model '0_hugectr' -> default value for each table = [0]
I0701 15:39:38.728034 9240 hugectr.cc:733] Model '0_hugectr' -> maxnum_des_feature_per_sample = 1
I0701 15:39:38.728041 9240 hugectr.cc:741] Model '0_hugectr' -> refresh_delay = 0
I0701 15:39:38.728047 9240 hugectr.cc:747] Model '0_hugectr' -> refresh_interval = 0
I0701 15:39:38.728054 9240 hugectr.cc:755] Model '0_hugectr' -> maxnum_catfeature_query_per_table_per_sample list = [3]
I0701 15:39:38.728061 9240 hugectr.cc:766] Model '0_hugectr' -> embedding_vecsize_per_table list = [16]
I0701 15:39:38.728068 9240 hugectr.cc:773] Model '0_hugectr' -> embedding model names = [, sparse_embedding1]
I0701 15:39:38.728074 9240 hugectr.cc:780] Model '0_hugectr' -> label_dim = 1
I0701 15:39:38.728080 9240 hugectr.cc:785] Model '0_hugectr' -> the number of slots = 3
I0701 15:39:38.728462 9240 hugectr.cc:1078] Verifying model configuration: {
"name": "0_hugectr",
"platform": "",
"backend": "hugectr",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 64,
"input": [
{
"name": "DES",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "CATCOLUMN",
"data_type": "TYPE_INT64",
"format": "FORMAT_NONE",
"dims": [
-1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "ROWINDEX",
"data_type": "TYPE_INT32",
"format": "FORMAT_NONE",
"dims": [
-1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "OUTPUT0",
"data_type": "TYPE_FP32",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"priority": "PRIORITY_DEFAULT",
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"instance_group": [
{
"name": "0_hugectr_0",
"kind": "KIND_GPU",
"count": 1,
"gpus": [
0
],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"cat_feature_num": {
"string_value": "3"
},
"config": {
"string_value": "/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json"
},
"label_dim": {
"string_value": "1"
},
"max_nnz": {
"string_value": "2"
},
"embedding_vector_size": {
"string_value": "16"
},
"gpucacheper": {
"string_value": "0.5"
},
"des_feature_num": {
"string_value": "1"
},
"slot_sizes": {
"string_value": "[[64, 64, 64]]"
},
"embeddingkey_long_type": {
"string_value": "true"
},
"gpucache": {
"string_value": "true"
},
"slots": {
"string_value": "3"
}
},
"model_warmup": []
}
I0701 15:39:38.728498 9240 hugectr.cc:1164] The model configuration: {
"name": "0_hugectr",
"platform": "",
"backend": "hugectr",
"version_policy": {
"latest": {
"num_versions": 1
}
},
"max_batch_size": 64,
"input": [
{
"name": "DES",
"data_type": "TYPE_FP32",
"format": "FORMAT_NONE",
"dims": [
-1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "CATCOLUMN",
"data_type": "TYPE_INT64",
"format": "FORMAT_NONE",
"dims": [
-1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
},
{
"name": "ROWINDEX",
"data_type": "TYPE_INT32",
"format": "FORMAT_NONE",
"dims": [
-1
],
"is_shape_tensor": false,
"allow_ragged_batch": false,
"optional": false
}
],
"output": [
{
"name": "OUTPUT0",
"data_type": "TYPE_FP32",
"dims": [
-1
],
"label_filename": "",
"is_shape_tensor": false
}
],
"batch_input": [],
"batch_output": [],
"optimization": {
"priority": "PRIORITY_DEFAULT",
"input_pinned_memory": {
"enable": true
},
"output_pinned_memory": {
"enable": true
},
"gather_kernel_buffer_threshold": 0,
"eager_batching": false
},
"instance_group": [
{
"name": "0_hugectr_0",
"kind": "KIND_GPU",
"count": 1,
"gpus": [
0
],
"secondary_devices": [],
"profile": [],
"passive": false,
"host_policy": ""
}
],
"default_model_filename": "",
"cc_model_filenames": {},
"metric_tags": {},
"parameters": {
"cat_feature_num": {
"string_value": "3"
},
"config": {
"string_value": "/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json"
},
"label_dim": {
"string_value": "1"
},
"max_nnz": {
"string_value": "2"
},
"embedding_vector_size": {
"string_value": "16"
},
"gpucacheper": {
"string_value": "0.5"
},
"des_feature_num": {
"string_value": "1"
},
"slot_sizes": {
"string_value": "[[64, 64, 64]]"
},
"embeddingkey_long_type": {
"string_value": "true"
},
"gpucache": {
"string_value": "true"
},
"slots": {
"string_value": "3"
}
},
"model_warmup": []
}
I0701 15:39:38.728519 9240 hugectr.cc:1209] slots set = 3
I0701 15:39:38.728525 9240 hugectr.cc:1213] slots set = 3
I0701 15:39:38.728531 9240 hugectr.cc:1221] desene number = 1
I0701 15:39:38.728538 9240 hugectr.cc:1239] The max categorical feature number = 3
I0701 15:39:38.728544 9240 hugectr.cc:1244] embedding size = 16
I0701 15:39:38.728550 9240 hugectr.cc:1250] embedding size = 16
I0701 15:39:38.728556 9240 hugectr.cc:1256] maxnnz = 2
I0701 15:39:38.728564 9240 hugectr.cc:1265] refresh_interval = 0
I0701 15:39:38.728570 9240 hugectr.cc:1273] refresh_delay = 0
I0701 15:39:38.728576 9240 hugectr.cc:1281] HugeCTR model config path = /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json
I0701 15:39:38.728584 9240 hugectr.cc:1329] support mixed_precision = 0
I0701 15:39:38.728593 9240 hugectr.cc:1348] gpu cache per = 0.5
I0701 15:39:38.728600 9240 hugectr.cc:1366] hit-rate threshold = 0.9
I0701 15:39:38.728606 9240 hugectr.cc:1374] Label dim = 1
I0701 15:39:38.728612 9240 hugectr.cc:1383] support 64-bit embedding key = 1
I0701 15:39:38.728618 9240 hugectr.cc:1394] Model_Inference_Para.max_batchsize: 64
I0701 15:39:38.728631 9240 hugectr.cc:1398] max_batch_size in model config.pbtxt is 64
I0701 15:39:38.728638 9240 hugectr.cc:1468] ******Creating Embedding Cache for model 0_hugectr in device 0
I0701 15:39:38.728644 9240 hugectr.cc:1495] ******Creating Embedding Cache for model 0_hugectr successfully
I0701 15:39:38.729005 9240 hugectr.cc:1996] TRITONBACKEND_ModelInstanceInitialize: 0_hugectr_0 (device 0)
I0701 15:39:38.729018 9240 hugectr.cc:1637] Triton Model Instance Initialization on device 0
I0701 15:39:38.729025 9240 hugectr.cc:1647] Dense Feature buffer allocation:
I0701 15:39:38.738926 9240 hugectr.cc:1654] Categorical Feature buffer allocation:
I0701 15:39:38.738962 9240 hugectr.cc:1672] Categorical Row Index buffer allocation:
I0701 15:39:38.738975 9240 hugectr.cc:1680] Predict result buffer allocation:
I0701 15:39:38.738987 9240 hugectr.cc:2009] Loading HugeCTR Model
I0701 15:39:38.738994 9240 hugectr.cc:1698] The model origin json configuration file path is: /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json
I0701 15:39:39.502476 9240 hugectr.cc:1706] ******Loading HugeCTR model successfully
I0701 15:39:39.502645 9240 model_repository_manager.cc:1345] successfully loaded '0_hugectr' version 1
E0701 15:39:39.502711 9240 model_repository_manager.cc:1551] Invalid argument: ensemble 'ensemble_model' depends on '0_predicthugectr' which has no loaded version
I0701 15:39:39.502877 9240 server.cc:556]
+------------------+------+
| Repository Agent | Path |
+------------------+------+
+------------------+------+
I0701 15:39:39.503064 9240 server.cc:583]
+---------+---------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Backend | Path | Config |
+---------+---------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| python | /opt/tritonserver/backends/python/libtriton_python.so | {"cmdline":{"auto-complete-config":"false","min-compute-capability":"6.000000","backend-directory":"/opt/tritonserver/backends","default-max-batch-size":"4"}} |
| hugectr | /opt/tritonserver/backends/hugectr/libtriton_hugectr.so | {"cmdline":{"auto-complete-config":"false","backend-directory":"/opt/tritonserver/backends","min-compute-capability":"6.000000","ps":"/tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/ps.json","default-max-batch-size":"4"}} |
+---------+---------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0701 15:39:39.503207 9240 server.cc:626]
+------------------+---------+----------------------------------------------------------------------------------------------------------------------+
| Model | Version | Status |
+------------------+---------+----------------------------------------------------------------------------------------------------------------------+
| 0_hugectr | 1 | READY |
| 0_predicthugectr | 1 | UNAVAILABLE: Internal: ModuleNotFoundError: No module named 'merlin.systems.dag.ops.hugectr' |
| | | |
| | | At: |
| | | (973): _find_and_load_unlocked |
| | | (991): _find_and_load |
| | | (1014): _gcd_import |
| | | /usr/lib/python3.8/importlib/init.py(127): import_module |
| | | /usr/local/lib/python3.8/dist-packages/merlin/systems/dag/op_runner.py(30): init |
| | | /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository/0_predicthugectr/1/model.py(66): initialize |
+------------------+---------+----------------------------------------------------------------------------------------------------------------------+
I0701 15:39:39.536239 9240 metrics.cc:650] Collecting metrics for GPU 0: Tesla P100-DGXS-16GB
I0701 15:39:39.537139 9240 tritonserver.cc:2138]
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Option | Value |
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| server_id | triton |
| server_version | 2.22.0 |
| server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace |
| model_repository_path[0] | /tmp/pytest-of-jenkins/pytest-8/test_predict_hugectr0/model_repository |
| model_control_mode | MODE_NONE |
| strict_model_config | 1 |
| rate_limit | OFF |
| pinned_memory_pool_byte_size | 268435456 |
| cuda_memory_pool_byte_size{0} | 67108864 |
| response_cache_byte_size | 0 |
| min_supported_compute_capability | 6.0 |
| strict_readiness | 1 |
| exit_timeout | 30 |
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0701 15:39:39.537176 9240 server.cc:257] Waiting for in-flight requests to complete.
I0701 15:39:39.537185 9240 server.cc:273] Timeout 30: Found 0 model versions that have in-flight inferences
I0701 15:39:39.537195 9240 model_repository_manager.cc:1223] unloading: 0_hugectr:1
I0701 15:39:39.537236 9240 server.cc:288] All models are stopped, unloading models
I0701 15:39:39.537244 9240 server.cc:295] Timeout 30: Found 1 live models and 0 in-flight non-inference requests
I0701 15:39:39.537308 9240 hugectr.cc:2026] TRITONBACKEND_ModelInstanceFinalize: delete instance state
I0701 15:39:39.558124 9240 hugectr.cc:1957] TRITONBACKEND_ModelFinalize: delete model state
I0701 15:39:39.559197 9240 hugectr.cc:1505] ******Destorying Embedding Cache for model 0_hugectr successfully
I0701 15:39:39.559252 9240 model_repository_manager.cc:1328] successfully unloaded '0_hugectr' version 1
I0701 15:39:40.537317 9240 server.cc:295] Timeout 29: Found 0 live models and 0 in-flight non-inference requests
I0701 15:39:40.537386 9240 hugectr.cc:1827] TRITONBACKEND_Backend Finalize: HugectrBackend
W0701 15:39:40.627541 9240 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0
W0701 15:39:40.627609 9240 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0
error: creating server: Internal - failed to load all models
W0701 15:39:41.628776 9240 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0
W0701 15:39:41.628831 9240 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0
=============================== warnings summary ===============================
../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18
/var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The nvtabular.framework_utils module is being replaced by the Merlin Models library. Support for importing from nvtabular.framework_utils is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead.
warnings.warn(
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet]
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet]
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet]
/usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/fil/test_fil.py::test_binary_classifier_default[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_binary_classifier_with_proba[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_multi_classifier[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_regressor[sklearn_forest_regressor-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_model_file[sklearn_forest_regressor-checkpoint.tl]
/usr/local/lib/python3.8/dist-packages/sklearn/utils/deprecation.py:103: FutureWarning: Attribute n_features_ was deprecated in version 1.0 and will be removed in 1.2. Use n_features_in_ instead.
warnings.warn(msg, category=FutureWarning)
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
=========================== short test summary info ============================
FAILED tests/unit/systems/fil/test_forest.py::test_load_from_config - Asserti...
FAILED tests/unit/systems/hugectr/test_hugectr.py::test_predict_hugectr - Run...
======= 2 failed, 47 passed, 1 skipped, 18 warnings in 168.75s (0:02:48) =======
Build step 'Execute shell' marked build as failure
Performing Post build task...
Match found for : : True
Logical operation result is TRUE
Running script : #!/bin/bash
cd /var/jenkins_home/
CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log"
[merlin_systems] $ /bin/bash /tmp/jenkins1120361343703709016.sh
rerun tests
Click to view CI Results
GitHub pull request #125 of commit 088570474e008fa0580cb7ae6de1c4a2bceadf4e, no merge conflicts.
GitHub pull request #125 of commit 088570474e008fa0580cb7ae6de1c4a2bceadf4e, no merge conflicts.
Running as SYSTEM
Setting status of 088570474e008fa0580cb7ae6de1c4a2bceadf4e to PENDING with url https://10.20.13.93:8080/job/merlin_systems/122/console and message: 'Pending'
Using context: Jenkins
Building on master in workspace /var/jenkins_home/workspace/merlin_systems
using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e
> git rev-parse --is-inside-work-tree # timeout=10
Fetching changes from the remote Git repository
> git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10
Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems
> git --version # timeout=10
using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken
> git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10
> git rev-parse 088570474e008fa0580cb7ae6de1c4a2bceadf4e^{commit} # timeout=10
Checking out Revision 088570474e008fa0580cb7ae6de1c4a2bceadf4e (detached)
> git config core.sparsecheckout # timeout=10
> git checkout -f 088570474e008fa0580cb7ae6de1c4a2bceadf4e # timeout=10
Commit message: "added check for categorical columns in constructor"
> git rev-list --no-walk 088570474e008fa0580cb7ae6de1c4a2bceadf4e # timeout=10
[merlin_systems] $ /bin/bash /tmp/jenkins14042309132201631493.sh
============================= test session starts ==============================
platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0
rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml
plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0
collected 50 items
tests/unit/test_version.py . [ 2%]
tests/unit/systems/test_ensemble.py .... [ 10%]
tests/unit/systems/test_ensemble_ops.py .. [ 14%]
tests/unit/systems/test_export.py . [ 16%]
tests/unit/systems/test_graph.py . [ 18%]
tests/unit/systems/test_inference_ops.py .. [ 22%]
tests/unit/systems/test_op_runner.py .... [ 30%]
tests/unit/systems/test_tensorflow_inf_op.py ... [ 36%]
tests/unit/systems/fil/test_fil.py .......................... [ 88%]
tests/unit/systems/fil/test_forest.py F.. [ 94%]
tests/unit/systems/hugectr/test_hugectr.py sF. [100%]
=================================== FAILURES ===================================
____________________________ test_load_from_config _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-4/test_load_from_config0')
def test_load_from_config(tmpdir):
rows = 200
num_features = 16
X, y = sklearn.datasets.make_regression(
n_samples=rows,
n_features=num_features,
n_informative=num_features // 3,
random_state=0,
)
model = xgboost.XGBRegressor()
model.fit(X, y)
feature_names = [str(i) for i in range(num_features)]
input_schema = Schema([ColumnSchema(col, dtype=np.float32) for col in feature_names])
output_schema = Schema([ColumnSchema("output__0", dtype=np.float32)])
config = PredictForest(model, input_schema).export(
tmpdir, input_schema, output_schema, node_id=2
)
node_config = json.loads(config.parameters[config.name].string_value)
assert json.loads(node_config["output_dict"]) == {
"output__0": {"dtype": "float32", "is_list": False, "is_ragged": False}
}
E AssertionError: assert {'output__0':..., 'tags': []}} == {'output__0':...gged': False}}
E Differing items:
E {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False, 'tags': []}} != {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False}}
E Full diff:
E {
E 'output__0': {'dtype': 'float32',
E 'is_list': False,
E - 'is_ragged': False},
E ? -
E + 'is_ragged': False,
E + 'tags': []},
E }
tests/unit/systems/fil/test_forest.py:57: AssertionError
_____________________________ test_predict_hugectr _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0')
def test_predict_hugectr(tmpdir):
cat_dtypes = {"a": int, "b": int, "c": int}
categorical_columns = ["a", "b", "c"]
gdf = make_df(
{
"a": np.arange(64, dtype=np.int64),
"b": np.arange(64, dtype=np.int64),
"c": np.arange(64, dtype=np.int64),
"d": np.random.rand(64).tolist(),
"label": [0] * 64,
},
)
gdf["label"] = gdf["label"].astype("float32")
gdf["d"] = gdf["d"].astype("float32")
train_dataset = nvt.Dataset(gdf)
dense_columns = ["d"]
dict_dtypes = {}
col_schemas = train_dataset.schema.column_schemas
for col in dense_columns:
col_schemas[col] = col_schemas[col].with_tags(Tags.CONTINUOUS)
dict_dtypes[col] = np.float32
for col in categorical_columns:
col_schemas[col] = col_schemas[col].with_tags(Tags.CATEGORICAL)
dict_dtypes[col] = np.int64
for col in ["label"]:
col_schemas[col] = col_schemas[col].with_tags(Tags.TARGET)
dict_dtypes[col] = np.float32
train_path = os.path.join(tmpdir, "train/")
os.mkdir(train_path)
train_dataset.to_parquet(
output_path=train_path,
shuffle=nvt.io.Shuffle.PER_PARTITION,
cats=categorical_columns,
conts=dense_columns,
labels=["label"],
dtypes=dict_dtypes,
)
embeddings = {"a": (64, 16), "b": (64, 16), "c": (64, 16)}
total_cardinality = 0
slot_sizes = []
for column in cat_dtypes:
slot_sizes.append(embeddings[column][0])
total_cardinality += embeddings[column][0]
# slot sizes = list of caridinalities per column, total is sum of individual
model = _run_model(slot_sizes, train_path, len(dense_columns))
model_op = PredictHugeCTR(model, train_dataset.schema, max_nnz=2, device_list=[0])
model_repository_path = os.path.join(tmpdir, "model_repository")
input_schema = train_dataset.schema
triton_chain = input_schema.column_names >> model_op
ens = Ensemble(triton_chain, input_schema)
os.makedirs(model_repository_path)
enc_config, node_configs = ens.export(model_repository_path)
assert enc_config
assert len(node_configs) == 1
assert node_configs[0].name == "0_predicthugectr"
df = train_dataset.to_ddf().compute()[:5]
dense, cats, rowptr = _convert(df, slot_sizes, categorical_columns, labels=["label"])
response = _run_ensemble_on_tritonserver(
model_repository_path,
["OUTPUT0"],
df,
"ensemble_model",
backend_config=f"hugectr,ps={tmpdir}/model_repository/ps.json",
)
tests/unit/systems/hugectr/test_hugectr.py:314:
tests/unit/systems/utils/triton.py:39: in _run_ensemble_on_tritonserver
with run_triton_server(tmpdir, backend_config=backend_config) as client:
/usr/lib/python3.8/contextlib.py:113: in enter
return next(self.gen)
modelpath = '/tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/model_repository'
backend_config = 'hugectr,ps=/tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/model_repository/ps.json'
@contextlib.contextmanager
def run_triton_server(modelpath, backend_config="tensorflow,version=2"):
"""This function starts up a Triton server instance and returns a client to it.
Parameters
----------
modelpath : string
The path to the model to load.
Yields
------
client: tritonclient.InferenceServerClient
The client connected to the Triton server.
"""
cmdline = [
TRITON_SERVER_PATH,
"--model-repository",
modelpath,
f"--backend-config={backend_config}",
]
env = os.environ.copy()
env["CUDA_VISIBLE_DEVICES"] = "0"
with subprocess.Popen(cmdline, env=env) as process:
try:
with grpcclient.InferenceServerClient("localhost:8001") as client:
# wait until server is ready
for _ in range(60):
if process.poll() is not None:
retcode = process.returncode
raise RuntimeError(f"Tritonserver failed to start (ret={retcode})")
E RuntimeError: Tritonserver failed to start (ret=1)
merlin/systems/triton/utils.py:46: RuntimeError
----------------------------- Captured stdout call -----------------------------
HugeCTR Version: 3.7
====================================================Model Init=====================================================
[HCTR][12:32:44.702][WARNING][RK0][main]: The model name is not specified when creating the solver.
[HCTR][12:32:44.702][WARNING][RK0][main]: MPI was already initialized somewhere elese. Lifetime service disabled.
[HCTR][12:32:44.702][INFO][RK0][main]: Global seed is 500188220
[HCTR][12:32:44.742][INFO][RK0][main]: Device to NUMA mapping:
GPU 0 -> node 0
[HCTR][12:32:45.303][WARNING][RK0][main]: Peer-to-peer access cannot be fully enabled.
[HCTR][12:32:45.303][INFO][RK0][main]: Start all2all warmup
[HCTR][12:32:45.303][INFO][RK0][main]: End all2all warmup
[HCTR][12:32:45.304][INFO][RK0][main]: Using All-reduce algorithm: NCCL
[HCTR][12:32:45.304][INFO][RK0][main]: Device 0: Tesla P100-DGXS-16GB
[HCTR][12:32:45.304][INFO][RK0][main]: num of DataReader workers: 1
[HCTR][12:32:45.304][INFO][RK0][main]: Vocabulary size: 0
[HCTR][12:32:45.304][INFO][RK0][main]: max_vocabulary_size_per_gpu_=584362
[HCTR][12:32:45.305][DEBUG][RK0][tid #140647730231040]: file_name_ /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][12:32:45.305][DEBUG][RK0][tid #140647721838336]: file_name_ /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][12:32:45.306][INFO][RK0][main]: Graph analysis to resolve tensor dependency
===================================================Model Compile===================================================
[HCTR][12:32:45.599][INFO][RK0][main]: gpu0 start to init embedding
[HCTR][12:32:45.600][INFO][RK0][main]: gpu0 init embedding done
[HCTR][12:32:45.601][INFO][RK0][main]: Starting AUC NCCL warm-up
[HCTR][12:32:45.602][INFO][RK0][main]: Warm-up done
===================================================Model Summary===================================================
[HCTR][12:32:45.602][INFO][RK0][main]: label Dense Sparse
label dense data1
(None, 1) (None, 1)
——————————————————————————————————————————————————————————————————————————————————————————————————————————————————
Layer Type Input Name Output Name Output Shape
——————————————————————————————————————————————————————————————————————————————————————————————————————————————————
DistributedSlotSparseEmbeddingHash data1 sparse_embedding1 (None, 3, 16)
InnerProduct dense fc1 (None, 512)
Reshape sparse_embedding1 reshape1 (None, 48)
InnerProduct reshape1 fc2 (None, 1)
fc1
BinaryCrossEntropyLoss fc2 loss
label
=====================================================Model Fit=====================================================
[HCTR][12:32:45.602][INFO][RK0][main]: Use non-epoch mode with number of iterations: 20
[HCTR][12:32:45.602][INFO][RK0][main]: Training batchsize: 10, evaluation batchsize: 10
[HCTR][12:32:45.602][INFO][RK0][main]: Evaluation interval: 200, snapshot interval: 10
[HCTR][12:32:45.602][INFO][RK0][main]: Dense network trainable: True
[HCTR][12:32:45.602][INFO][RK0][main]: Sparse embedding sparse_embedding1 trainable: True
[HCTR][12:32:45.602][INFO][RK0][main]: Use mixed precision: False, scaler: 1.000000, use cuda graph: True
[HCTR][12:32:45.602][INFO][RK0][main]: lr: 0.001000, warmup_steps: 1, end_lr: 0.000000
[HCTR][12:32:45.602][INFO][RK0][main]: decay_start: 0, decay_steps: 1, decay_power: 2.000000
[HCTR][12:32:45.602][INFO][RK0][main]: Training source file: /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/train/file_list.txt
[HCTR][12:32:45.602][INFO][RK0][main]: Evaluation source file: /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/train/file_list.txt
[HCTR][12:32:45.607][DEBUG][RK0][tid #140647730231040]: file_name /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/train/part_0.parquet file_total_rows 64
[HCTR][12:32:45.612][DEBUG][RK0][tid #140647730231040]: file_name_ /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][12:32:45.627][INFO][RK0][main]: Rank0: Write hash table to file
[HCTR][12:32:45.627][INFO][RK0][main]: Dumping sparse weights to files, successful
[HCTR][12:32:45.645][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][12:32:45.680][INFO][RK0][main]: Done
[HCTR][12:32:45.699][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][12:32:45.736][INFO][RK0][main]: Done
[HCTR][12:32:45.738][INFO][RK0][main]: Dumping sparse optimzer states to files, successful
[HCTR][12:32:45.738][INFO][RK0][main]: Dumping dense weights to file, successful
[HCTR][12:32:45.738][INFO][RK0][main]: Dumping dense optimizer states to file, successful
[HCTR][12:32:45.743][DEBUG][RK0][tid #140647730231040]: file_name_ /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][12:32:45.745][INFO][RK0][main]: Finish 20 iterations with batchsize: 10 in 0.14s.
[HCTR][12:32:45.747][INFO][RK0][main]: Save the model graph to /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json successfully
[HCTR][12:32:45.748][INFO][RK0][main]: Rank0: Write hash table to file
[HCTR][12:32:45.748][INFO][RK0][main]: Dumping sparse weights to files, successful
[HCTR][12:32:45.766][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][12:32:45.800][INFO][RK0][main]: Done
[HCTR][12:32:45.819][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][12:32:45.856][INFO][RK0][main]: Done
[HCTR][12:32:45.858][INFO][RK0][main]: Dumping sparse optimzer states to files, successful
[HCTR][12:32:45.858][INFO][RK0][main]: Dumping dense weights to file, successful
[HCTR][12:32:45.858][INFO][RK0][main]: Dumping dense optimizer states to file, successful
----------------------------- Captured stderr call -----------------------------
I0705 12:32:46.157697 946 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7f6614000000' with size 268435456
I0705 12:32:46.158435 946 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864
I0705 12:32:46.161665 946 model_repository_manager.cc:1191] loading: 0_predicthugectr:1
I0705 12:32:46.261999 946 model_repository_manager.cc:1191] loading: 0_hugectr:1
I0705 12:32:46.269000 946 python.cc:2388] TRITONBACKEND_ModelInstanceInitialize: 0_predicthugectr (GPU device 0)
I0705 12:32:49.001932 946 model_repository_manager.cc:1345] successfully loaded '0_predicthugectr' version 1
I0705 12:32:49.031773 946 hugectr.cc:1738] TRITONBACKEND_Initialize: hugectr
I0705 12:32:49.031802 946 hugectr.cc:1745] Triton TRITONBACKEND API version: 1.9
I0705 12:32:49.031811 946 hugectr.cc:1749] 'hugectr' TRITONBACKEND API version: 1.10
I0705 12:32:49.031822 946 hugectr.cc:1827] TRITONBACKEND_Backend Finalize: HugectrBackend
E0705 12:32:49.031832 946 model_repository_manager.cc:1348] failed to load '0_hugectr' version 1: Unsupported: Triton backend API version does not support this backend
I0705 12:32:49.032114 946 model_repository_manager.cc:1191] loading: ensemble_model:1
I0705 12:32:49.153145 946 model_repository_manager.cc:1345] successfully loaded 'ensemble_model' version 1
I0705 12:32:49.153270 946 server.cc:556]
+------------------+------+
| Repository Agent | Path |
+------------------+------+
+------------------+------+
I0705 12:32:49.153356 946 server.cc:583]
+---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Backend | Path | Config |
+---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
| python | /opt/tritonserver/backends/python/libtriton_python.so | {"cmdline":{"auto-complete-config":"false","min-compute-capability":"6.000000","backend-directory":"/opt/tritonserver/backends","default-max-batch-size":"4"}} |
+---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0705 12:32:49.153445 946 server.cc:626]
+------------------+---------+------------------------------------------------------------------------------------+
| Model | Version | Status |
+------------------+---------+------------------------------------------------------------------------------------+
| 0_hugectr | 1 | UNAVAILABLE: Unsupported: Triton backend API version does not support this backend |
| 0_predicthugectr | 1 | READY |
| ensemble_model | 1 | READY |
+------------------+---------+------------------------------------------------------------------------------------+
I0705 12:32:49.216576 946 metrics.cc:650] Collecting metrics for GPU 0: Tesla P100-DGXS-16GB
I0705 12:32:49.217411 946 tritonserver.cc:2138]
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Option | Value |
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| server_id | triton |
| server_version | 2.22.0 |
| server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace |
| model_repository_path[0] | /tmp/pytest-of-jenkins/pytest-4/test_predict_hugectr0/model_repository |
| model_control_mode | MODE_NONE |
| strict_model_config | 1 |
| rate_limit | OFF |
| pinned_memory_pool_byte_size | 268435456 |
| cuda_memory_pool_byte_size{0} | 67108864 |
| response_cache_byte_size | 0 |
| min_supported_compute_capability | 6.0 |
| strict_readiness | 1 |
| exit_timeout | 30 |
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0705 12:32:49.217449 946 server.cc:257] Waiting for in-flight requests to complete.
I0705 12:32:49.217458 946 server.cc:273] Timeout 30: Found 0 model versions that have in-flight inferences
I0705 12:32:49.217464 946 model_repository_manager.cc:1223] unloading: ensemble_model:1
I0705 12:32:49.217502 946 model_repository_manager.cc:1223] unloading: 0_predicthugectr:1
I0705 12:32:49.217579 946 server.cc:288] All models are stopped, unloading models
I0705 12:32:49.217589 946 server.cc:295] Timeout 30: Found 2 live models and 0 in-flight non-inference requests
I0705 12:32:49.217725 946 model_repository_manager.cc:1328] successfully unloaded 'ensemble_model' version 1
I0705 12:32:50.217672 946 server.cc:295] Timeout 29: Found 1 live models and 0 in-flight non-inference requests
W0705 12:32:50.237878 946 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0
W0705 12:32:50.237934 946 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0
I0705 12:32:50.611706 946 model_repository_manager.cc:1328] successfully unloaded '0_predicthugectr' version 1
I0705 12:32:51.217802 946 server.cc:295] Timeout 28: Found 0 live models and 0 in-flight non-inference requests
error: creating server: Internal - failed to load all models
W0705 12:32:51.238086 946 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0
W0705 12:32:51.238136 946 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0
=============================== warnings summary ===============================
../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18
/var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The nvtabular.framework_utils module is being replaced by the Merlin Models library. Support for importing from nvtabular.framework_utils is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead.
warnings.warn(
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet]
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet]
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet]
/usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/fil/test_fil.py::test_binary_classifier_default[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_binary_classifier_with_proba[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_multi_classifier[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_regressor[sklearn_forest_regressor-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_model_file[sklearn_forest_regressor-checkpoint.tl]
/usr/local/lib/python3.8/dist-packages/sklearn/utils/deprecation.py:103: FutureWarning: Attribute n_features_ was deprecated in version 1.0 and will be removed in 1.2. Use n_features_in_ instead.
warnings.warn(msg, category=FutureWarning)
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
=========================== short test summary info ============================
FAILED tests/unit/systems/fil/test_forest.py::test_load_from_config - Asserti...
FAILED tests/unit/systems/hugectr/test_hugectr.py::test_predict_hugectr - Run...
======= 2 failed, 47 passed, 1 skipped, 18 warnings in 164.00s (0:02:43) =======
Build step 'Execute shell' marked build as failure
Performing Post build task...
Match found for : : True
Logical operation result is TRUE
Running script : #!/bin/bash
cd /var/jenkins_home/
CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log"
[merlin_systems] $ /bin/bash /tmp/jenkins18132275909526090334.sh
rerun tests
Click to view CI Results
GitHub pull request #125 of commit 088570474e008fa0580cb7ae6de1c4a2bceadf4e, no merge conflicts.
GitHub pull request #125 of commit 088570474e008fa0580cb7ae6de1c4a2bceadf4e, no merge conflicts.
Running as SYSTEM
Setting status of 088570474e008fa0580cb7ae6de1c4a2bceadf4e to PENDING with url https://10.20.13.93:8080/job/merlin_systems/123/console and message: 'Pending'
Using context: Jenkins
Building on master in workspace /var/jenkins_home/workspace/merlin_systems
using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e
> git rev-parse --is-inside-work-tree # timeout=10
Fetching changes from the remote Git repository
> git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10
Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems
> git --version # timeout=10
using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken
> git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10
> git rev-parse 088570474e008fa0580cb7ae6de1c4a2bceadf4e^{commit} # timeout=10
Checking out Revision 088570474e008fa0580cb7ae6de1c4a2bceadf4e (detached)
> git config core.sparsecheckout # timeout=10
> git checkout -f 088570474e008fa0580cb7ae6de1c4a2bceadf4e # timeout=10
Commit message: "added check for categorical columns in constructor"
> git rev-list --no-walk 088570474e008fa0580cb7ae6de1c4a2bceadf4e # timeout=10
[merlin_systems] $ /bin/bash /tmp/jenkins12995046985373788952.sh
PYTHONPATH=/var/jenkins_home/workspace/merlin_systems/systems:/usr/local/hugectr/lib
============================= test session starts ==============================
platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0
rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml
plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0
collected 50 items
tests/unit/test_version.py . [ 2%]
tests/unit/systems/test_ensemble.py .... [ 10%]
tests/unit/systems/test_ensemble_ops.py .. [ 14%]
tests/unit/systems/test_export.py . [ 16%]
tests/unit/systems/test_graph.py . [ 18%]
tests/unit/systems/test_inference_ops.py .. [ 22%]
tests/unit/systems/test_op_runner.py .... [ 30%]
tests/unit/systems/test_tensorflow_inf_op.py ... [ 36%]
tests/unit/systems/fil/test_fil.py .......................... [ 88%]
tests/unit/systems/fil/test_forest.py F.. [ 94%]
tests/unit/systems/hugectr/test_hugectr.py sF. [100%]
=================================== FAILURES ===================================
____________________________ test_load_from_config _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-5/test_load_from_config0')
def test_load_from_config(tmpdir):
rows = 200
num_features = 16
X, y = sklearn.datasets.make_regression(
n_samples=rows,
n_features=num_features,
n_informative=num_features // 3,
random_state=0,
)
model = xgboost.XGBRegressor()
model.fit(X, y)
feature_names = [str(i) for i in range(num_features)]
input_schema = Schema([ColumnSchema(col, dtype=np.float32) for col in feature_names])
output_schema = Schema([ColumnSchema("output__0", dtype=np.float32)])
config = PredictForest(model, input_schema).export(
tmpdir, input_schema, output_schema, node_id=2
)
node_config = json.loads(config.parameters[config.name].string_value)
assert json.loads(node_config["output_dict"]) == {
"output__0": {"dtype": "float32", "is_list": False, "is_ragged": False}
}
E AssertionError: assert {'output__0':..., 'tags': []}} == {'output__0':...gged': False}}
E Differing items:
E {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False, 'tags': []}} != {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False}}
E Full diff:
E {
E 'output__0': {'dtype': 'float32',
E 'is_list': False,
E - 'is_ragged': False},
E ? -
E + 'is_ragged': False,
E + 'tags': []},
E }
tests/unit/systems/fil/test_forest.py:57: AssertionError
_____________________________ test_predict_hugectr _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0')
def test_predict_hugectr(tmpdir):
cat_dtypes = {"a": int, "b": int, "c": int}
categorical_columns = ["a", "b", "c"]
gdf = make_df(
{
"a": np.arange(64, dtype=np.int64),
"b": np.arange(64, dtype=np.int64),
"c": np.arange(64, dtype=np.int64),
"d": np.random.rand(64).tolist(),
"label": [0] * 64,
},
)
gdf["label"] = gdf["label"].astype("float32")
gdf["d"] = gdf["d"].astype("float32")
train_dataset = nvt.Dataset(gdf)
dense_columns = ["d"]
dict_dtypes = {}
col_schemas = train_dataset.schema.column_schemas
for col in dense_columns:
col_schemas[col] = col_schemas[col].with_tags(Tags.CONTINUOUS)
dict_dtypes[col] = np.float32
for col in categorical_columns:
col_schemas[col] = col_schemas[col].with_tags(Tags.CATEGORICAL)
dict_dtypes[col] = np.int64
for col in ["label"]:
col_schemas[col] = col_schemas[col].with_tags(Tags.TARGET)
dict_dtypes[col] = np.float32
train_path = os.path.join(tmpdir, "train/")
os.mkdir(train_path)
train_dataset.to_parquet(
output_path=train_path,
shuffle=nvt.io.Shuffle.PER_PARTITION,
cats=categorical_columns,
conts=dense_columns,
labels=["label"],
dtypes=dict_dtypes,
)
embeddings = {"a": (64, 16), "b": (64, 16), "c": (64, 16)}
total_cardinality = 0
slot_sizes = []
for column in cat_dtypes:
slot_sizes.append(embeddings[column][0])
total_cardinality += embeddings[column][0]
# slot sizes = list of caridinalities per column, total is sum of individual
model = _run_model(slot_sizes, train_path, len(dense_columns))
model_op = PredictHugeCTR(model, train_dataset.schema, max_nnz=2, device_list=[0])
model_repository_path = os.path.join(tmpdir, "model_repository")
input_schema = train_dataset.schema
triton_chain = input_schema.column_names >> model_op
ens = Ensemble(triton_chain, input_schema)
os.makedirs(model_repository_path)
enc_config, node_configs = ens.export(model_repository_path)
assert enc_config
assert len(node_configs) == 1
assert node_configs[0].name == "0_predicthugectr"
df = train_dataset.to_ddf().compute()[:5]
dense, cats, rowptr = _convert(df, slot_sizes, categorical_columns, labels=["label"])
response = _run_ensemble_on_tritonserver(
model_repository_path,
["OUTPUT0"],
df,
"ensemble_model",
backend_config=f"hugectr,ps={tmpdir}/model_repository/ps.json",
)
tests/unit/systems/hugectr/test_hugectr.py:314:
tests/unit/systems/utils/triton.py:39: in _run_ensemble_on_tritonserver
with run_triton_server(tmpdir, backend_config=backend_config) as client:
/usr/lib/python3.8/contextlib.py:113: in enter
return next(self.gen)
modelpath = '/tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/model_repository'
backend_config = 'hugectr,ps=/tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/model_repository/ps.json'
@contextlib.contextmanager
def run_triton_server(modelpath, backend_config="tensorflow,version=2"):
"""This function starts up a Triton server instance and returns a client to it.
Parameters
----------
modelpath : string
The path to the model to load.
Yields
------
client: tritonclient.InferenceServerClient
The client connected to the Triton server.
"""
cmdline = [
TRITON_SERVER_PATH,
"--model-repository",
modelpath,
f"--backend-config={backend_config}",
]
env = os.environ.copy()
env["CUDA_VISIBLE_DEVICES"] = "0"
with subprocess.Popen(cmdline, env=env) as process:
try:
with grpcclient.InferenceServerClient("localhost:8001") as client:
# wait until server is ready
for _ in range(60):
if process.poll() is not None:
retcode = process.returncode
raise RuntimeError(f"Tritonserver failed to start (ret={retcode})")
E RuntimeError: Tritonserver failed to start (ret=1)
merlin/systems/triton/utils.py:46: RuntimeError
----------------------------- Captured stdout call -----------------------------
HugeCTR Version: 3.7
====================================================Model Init=====================================================
[HCTR][12:40:35.769][WARNING][RK0][main]: The model name is not specified when creating the solver.
[HCTR][12:40:35.769][WARNING][RK0][main]: MPI was already initialized somewhere elese. Lifetime service disabled.
[HCTR][12:40:35.769][INFO][RK0][main]: Global seed is 4255386450
[HCTR][12:40:35.815][INFO][RK0][main]: Device to NUMA mapping:
GPU 0 -> node 0
[HCTR][12:40:36.376][WARNING][RK0][main]: Peer-to-peer access cannot be fully enabled.
[HCTR][12:40:36.376][INFO][RK0][main]: Start all2all warmup
[HCTR][12:40:36.376][INFO][RK0][main]: End all2all warmup
[HCTR][12:40:36.377][INFO][RK0][main]: Using All-reduce algorithm: NCCL
[HCTR][12:40:36.377][INFO][RK0][main]: Device 0: Tesla P100-DGXS-16GB
[HCTR][12:40:36.377][INFO][RK0][main]: num of DataReader workers: 1
[HCTR][12:40:36.378][INFO][RK0][main]: Vocabulary size: 0
[HCTR][12:40:36.378][INFO][RK0][main]: max_vocabulary_size_per_gpu_=584362
[HCTR][12:40:36.378][DEBUG][RK0][tid #139836535068416]: file_name_ /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][12:40:36.378][DEBUG][RK0][tid #139836526675712]: file_name_ /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][12:40:36.379][INFO][RK0][main]: Graph analysis to resolve tensor dependency
===================================================Model Compile===================================================
[HCTR][12:40:36.675][INFO][RK0][main]: gpu0 start to init embedding
[HCTR][12:40:36.676][INFO][RK0][main]: gpu0 init embedding done
[HCTR][12:40:36.677][INFO][RK0][main]: Starting AUC NCCL warm-up
[HCTR][12:40:36.678][INFO][RK0][main]: Warm-up done
===================================================Model Summary===================================================
[HCTR][12:40:36.678][INFO][RK0][main]: label Dense Sparse
label dense data1
(None, 1) (None, 1)
——————————————————————————————————————————————————————————————————————————————————————————————————————————————————
Layer Type Input Name Output Name Output Shape
——————————————————————————————————————————————————————————————————————————————————————————————————————————————————
DistributedSlotSparseEmbeddingHash data1 sparse_embedding1 (None, 3, 16)
InnerProduct dense fc1 (None, 512)
Reshape sparse_embedding1 reshape1 (None, 48)
InnerProduct reshape1 fc2 (None, 1)
fc1
BinaryCrossEntropyLoss fc2 loss
label
=====================================================Model Fit=====================================================
[HCTR][12:40:36.678][INFO][RK0][main]: Use non-epoch mode with number of iterations: 20
[HCTR][12:40:36.678][INFO][RK0][main]: Training batchsize: 10, evaluation batchsize: 10
[HCTR][12:40:36.678][INFO][RK0][main]: Evaluation interval: 200, snapshot interval: 10
[HCTR][12:40:36.678][INFO][RK0][main]: Dense network trainable: True
[HCTR][12:40:36.678][INFO][RK0][main]: Sparse embedding sparse_embedding1 trainable: True
[HCTR][12:40:36.678][INFO][RK0][main]: Use mixed precision: False, scaler: 1.000000, use cuda graph: True
[HCTR][12:40:36.678][INFO][RK0][main]: lr: 0.001000, warmup_steps: 1, end_lr: 0.000000
[HCTR][12:40:36.678][INFO][RK0][main]: decay_start: 0, decay_steps: 1, decay_power: 2.000000
[HCTR][12:40:36.678][INFO][RK0][main]: Training source file: /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/train/file_list.txt
[HCTR][12:40:36.678][INFO][RK0][main]: Evaluation source file: /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/train/file_list.txt
[HCTR][12:40:36.683][DEBUG][RK0][tid #139836535068416]: file_name /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/train/part_0.parquet file_total_rows 64
[HCTR][12:40:36.688][DEBUG][RK0][tid #139836535068416]: file_name_ /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][12:40:36.692][INFO][RK0][main]: Rank0: Write hash table to file
[HCTR][12:40:36.692][INFO][RK0][main]: Dumping sparse weights to files, successful
[HCTR][12:40:36.717][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][12:40:36.752][INFO][RK0][main]: Done
[HCTR][12:40:36.771][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][12:40:36.808][INFO][RK0][main]: Done
[HCTR][12:40:36.810][INFO][RK0][main]: Dumping sparse optimzer states to files, successful
[HCTR][12:40:36.810][INFO][RK0][main]: Dumping dense weights to file, successful
[HCTR][12:40:36.810][INFO][RK0][main]: Dumping dense optimizer states to file, successful
[HCTR][12:40:36.815][DEBUG][RK0][tid #139836535068416]: file_name_ /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][12:40:36.818][INFO][RK0][main]: Finish 20 iterations with batchsize: 10 in 0.14s.
[HCTR][12:40:36.819][INFO][RK0][main]: Save the model graph to /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json successfully
[HCTR][12:40:36.820][INFO][RK0][main]: Rank0: Write hash table to file
[HCTR][12:40:36.820][INFO][RK0][main]: Dumping sparse weights to files, successful
[HCTR][12:40:36.838][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][12:40:36.872][INFO][RK0][main]: Done
[HCTR][12:40:36.891][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][12:40:36.930][INFO][RK0][main]: Done
[HCTR][12:40:36.931][INFO][RK0][main]: Dumping sparse optimzer states to files, successful
[HCTR][12:40:36.931][INFO][RK0][main]: Dumping dense weights to file, successful
[HCTR][12:40:36.932][INFO][RK0][main]: Dumping dense optimizer states to file, successful
----------------------------- Captured stderr call -----------------------------
I0705 12:40:37.235594 2391 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7f9716000000' with size 268435456
I0705 12:40:37.236408 2391 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864
I0705 12:40:37.239737 2391 model_repository_manager.cc:1191] loading: 0_predicthugectr:1
I0705 12:40:37.340066 2391 model_repository_manager.cc:1191] loading: 0_hugectr:1
I0705 12:40:37.347009 2391 python.cc:2388] TRITONBACKEND_ModelInstanceInitialize: 0_predicthugectr (GPU device 0)
I0705 12:40:40.057794 2391 model_repository_manager.cc:1345] successfully loaded '0_predicthugectr' version 1
I0705 12:40:40.091206 2391 hugectr.cc:1738] TRITONBACKEND_Initialize: hugectr
I0705 12:40:40.091234 2391 hugectr.cc:1745] Triton TRITONBACKEND API version: 1.9
I0705 12:40:40.091243 2391 hugectr.cc:1749] 'hugectr' TRITONBACKEND API version: 1.10
I0705 12:40:40.091254 2391 hugectr.cc:1827] TRITONBACKEND_Backend Finalize: HugectrBackend
E0705 12:40:40.091264 2391 model_repository_manager.cc:1348] failed to load '0_hugectr' version 1: Unsupported: Triton backend API version does not support this backend
I0705 12:40:40.091491 2391 model_repository_manager.cc:1191] loading: ensemble_model:1
I0705 12:40:40.211529 2391 model_repository_manager.cc:1345] successfully loaded 'ensemble_model' version 1
I0705 12:40:40.211668 2391 server.cc:556]
+------------------+------+
| Repository Agent | Path |
+------------------+------+
+------------------+------+
I0705 12:40:40.211777 2391 server.cc:583]
+---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Backend | Path | Config |
+---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
| python | /opt/tritonserver/backends/python/libtriton_python.so | {"cmdline":{"auto-complete-config":"false","min-compute-capability":"6.000000","backend-directory":"/opt/tritonserver/backends","default-max-batch-size":"4"}} |
+---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0705 12:40:40.211871 2391 server.cc:626]
+------------------+---------+------------------------------------------------------------------------------------+
| Model | Version | Status |
+------------------+---------+------------------------------------------------------------------------------------+
| 0_hugectr | 1 | UNAVAILABLE: Unsupported: Triton backend API version does not support this backend |
| 0_predicthugectr | 1 | READY |
| ensemble_model | 1 | READY |
+------------------+---------+------------------------------------------------------------------------------------+
I0705 12:40:40.275562 2391 metrics.cc:650] Collecting metrics for GPU 0: Tesla P100-DGXS-16GB
I0705 12:40:40.276444 2391 tritonserver.cc:2138]
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Option | Value |
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| server_id | triton |
| server_version | 2.22.0 |
| server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace |
| model_repository_path[0] | /tmp/pytest-of-jenkins/pytest-5/test_predict_hugectr0/model_repository |
| model_control_mode | MODE_NONE |
| strict_model_config | 1 |
| rate_limit | OFF |
| pinned_memory_pool_byte_size | 268435456 |
| cuda_memory_pool_byte_size{0} | 67108864 |
| response_cache_byte_size | 0 |
| min_supported_compute_capability | 6.0 |
| strict_readiness | 1 |
| exit_timeout | 30 |
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0705 12:40:40.276483 2391 server.cc:257] Waiting for in-flight requests to complete.
I0705 12:40:40.276492 2391 server.cc:273] Timeout 30: Found 0 model versions that have in-flight inferences
I0705 12:40:40.276498 2391 model_repository_manager.cc:1223] unloading: ensemble_model:1
I0705 12:40:40.276538 2391 model_repository_manager.cc:1223] unloading: 0_predicthugectr:1
I0705 12:40:40.276630 2391 server.cc:288] All models are stopped, unloading models
I0705 12:40:40.276639 2391 server.cc:295] Timeout 30: Found 2 live models and 0 in-flight non-inference requests
I0705 12:40:40.276783 2391 model_repository_manager.cc:1328] successfully unloaded 'ensemble_model' version 1
I0705 12:40:41.276723 2391 server.cc:295] Timeout 29: Found 1 live models and 0 in-flight non-inference requests
W0705 12:40:41.292990 2391 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0
W0705 12:40:41.293040 2391 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0
I0705 12:40:41.691664 2391 model_repository_manager.cc:1328] successfully unloaded '0_predicthugectr' version 1
I0705 12:40:42.276872 2391 server.cc:295] Timeout 28: Found 0 live models and 0 in-flight non-inference requests
error: creating server: Internal - failed to load all models
W0705 12:40:42.293197 2391 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0
W0705 12:40:42.293250 2391 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0
=============================== warnings summary ===============================
../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18
/var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The nvtabular.framework_utils module is being replaced by the Merlin Models library. Support for importing from nvtabular.framework_utils is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead.
warnings.warn(
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet]
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet]
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet]
/usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/fil/test_fil.py::test_binary_classifier_default[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_binary_classifier_with_proba[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_multi_classifier[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_regressor[sklearn_forest_regressor-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_model_file[sklearn_forest_regressor-checkpoint.tl]
/usr/local/lib/python3.8/dist-packages/sklearn/utils/deprecation.py:103: FutureWarning: Attribute n_features_ was deprecated in version 1.0 and will be removed in 1.2. Use n_features_in_ instead.
warnings.warn(msg, category=FutureWarning)
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
=========================== short test summary info ============================
FAILED tests/unit/systems/fil/test_forest.py::test_load_from_config - Asserti...
FAILED tests/unit/systems/hugectr/test_hugectr.py::test_predict_hugectr - Run...
======= 2 failed, 47 passed, 1 skipped, 18 warnings in 174.24s (0:02:54) =======
Build step 'Execute shell' marked build as failure
Performing Post build task...
Match found for : : True
Logical operation result is TRUE
Running script : #!/bin/bash
cd /var/jenkins_home/
CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log"
[merlin_systems] $ /bin/bash /tmp/jenkins8397006177805341591.sh
rerun tests
Click to view CI Results
GitHub pull request #125 of commit 088570474e008fa0580cb7ae6de1c4a2bceadf4e, no merge conflicts.
GitHub pull request #125 of commit 088570474e008fa0580cb7ae6de1c4a2bceadf4e, no merge conflicts.
Running as SYSTEM
Setting status of 088570474e008fa0580cb7ae6de1c4a2bceadf4e to PENDING with url https://10.20.13.93:8080/job/merlin_systems/124/console and message: 'Pending'
Using context: Jenkins
Building on master in workspace /var/jenkins_home/workspace/merlin_systems
using credential fce1c729-5d7c-48e8-90cb-b0c314b1076e
> git rev-parse --is-inside-work-tree # timeout=10
Fetching changes from the remote Git repository
> git config remote.origin.url https://github.com/NVIDIA-Merlin/systems # timeout=10
Fetching upstream changes from https://github.com/NVIDIA-Merlin/systems
> git --version # timeout=10
using GIT_ASKPASS to set credentials login for merlin-systems user + githubtoken
> git fetch --tags --force --progress -- https://github.com/NVIDIA-Merlin/systems +refs/pull/125/*:refs/remotes/origin/pr/125/* # timeout=10
> git rev-parse 088570474e008fa0580cb7ae6de1c4a2bceadf4e^{commit} # timeout=10
Checking out Revision 088570474e008fa0580cb7ae6de1c4a2bceadf4e (detached)
> git config core.sparsecheckout # timeout=10
> git checkout -f 088570474e008fa0580cb7ae6de1c4a2bceadf4e # timeout=10
Commit message: "added check for categorical columns in constructor"
> git rev-list --no-walk 088570474e008fa0580cb7ae6de1c4a2bceadf4e # timeout=10
[merlin_systems] $ /bin/bash /tmp/jenkins4914296926181841090.sh
PYTHONPATH=/usr/local/hugectr/lib:/var/jenkins_home/workspace/merlin_systems/systems
============================= test session starts ==============================
platform linux -- Python 3.8.10, pytest-7.1.2, pluggy-1.0.0
rootdir: /var/jenkins_home/workspace/merlin_systems/systems, configfile: pyproject.toml
plugins: anyio-3.5.0, xdist-2.5.0, forked-1.4.0, cov-3.0.0
collected 50 items
tests/unit/test_version.py . [ 2%]
tests/unit/systems/test_ensemble.py .... [ 10%]
tests/unit/systems/test_ensemble_ops.py .. [ 14%]
tests/unit/systems/test_export.py . [ 16%]
tests/unit/systems/test_graph.py . [ 18%]
tests/unit/systems/test_inference_ops.py .. [ 22%]
tests/unit/systems/test_op_runner.py .... [ 30%]
tests/unit/systems/test_tensorflow_inf_op.py ... [ 36%]
tests/unit/systems/fil/test_fil.py .......................... [ 88%]
tests/unit/systems/fil/test_forest.py F.. [ 94%]
tests/unit/systems/hugectr/test_hugectr.py sF. [100%]
=================================== FAILURES ===================================
____________________________ test_load_from_config _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-6/test_load_from_config0')
def test_load_from_config(tmpdir):
rows = 200
num_features = 16
X, y = sklearn.datasets.make_regression(
n_samples=rows,
n_features=num_features,
n_informative=num_features // 3,
random_state=0,
)
model = xgboost.XGBRegressor()
model.fit(X, y)
feature_names = [str(i) for i in range(num_features)]
input_schema = Schema([ColumnSchema(col, dtype=np.float32) for col in feature_names])
output_schema = Schema([ColumnSchema("output__0", dtype=np.float32)])
config = PredictForest(model, input_schema).export(
tmpdir, input_schema, output_schema, node_id=2
)
node_config = json.loads(config.parameters[config.name].string_value)
assert json.loads(node_config["output_dict"]) == {
"output__0": {"dtype": "float32", "is_list": False, "is_ragged": False}
}
E AssertionError: assert {'output__0':..., 'tags': []}} == {'output__0':...gged': False}}
E Differing items:
E {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False, 'tags': []}} != {'output__0': {'dtype': 'float32', 'is_list': False, 'is_ragged': False}}
E Full diff:
E {
E 'output__0': {'dtype': 'float32',
E 'is_list': False,
E - 'is_ragged': False},
E ? -
E + 'is_ragged': False,
E + 'tags': []},
E }
tests/unit/systems/fil/test_forest.py:57: AssertionError
_____________________________ test_predict_hugectr _____________________________
tmpdir = local('/tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0')
def test_predict_hugectr(tmpdir):
cat_dtypes = {"a": int, "b": int, "c": int}
categorical_columns = ["a", "b", "c"]
gdf = make_df(
{
"a": np.arange(64, dtype=np.int64),
"b": np.arange(64, dtype=np.int64),
"c": np.arange(64, dtype=np.int64),
"d": np.random.rand(64).tolist(),
"label": [0] * 64,
},
)
gdf["label"] = gdf["label"].astype("float32")
gdf["d"] = gdf["d"].astype("float32")
train_dataset = nvt.Dataset(gdf)
dense_columns = ["d"]
dict_dtypes = {}
col_schemas = train_dataset.schema.column_schemas
for col in dense_columns:
col_schemas[col] = col_schemas[col].with_tags(Tags.CONTINUOUS)
dict_dtypes[col] = np.float32
for col in categorical_columns:
col_schemas[col] = col_schemas[col].with_tags(Tags.CATEGORICAL)
dict_dtypes[col] = np.int64
for col in ["label"]:
col_schemas[col] = col_schemas[col].with_tags(Tags.TARGET)
dict_dtypes[col] = np.float32
train_path = os.path.join(tmpdir, "train/")
os.mkdir(train_path)
train_dataset.to_parquet(
output_path=train_path,
shuffle=nvt.io.Shuffle.PER_PARTITION,
cats=categorical_columns,
conts=dense_columns,
labels=["label"],
dtypes=dict_dtypes,
)
embeddings = {"a": (64, 16), "b": (64, 16), "c": (64, 16)}
total_cardinality = 0
slot_sizes = []
for column in cat_dtypes:
slot_sizes.append(embeddings[column][0])
total_cardinality += embeddings[column][0]
# slot sizes = list of caridinalities per column, total is sum of individual
model = _run_model(slot_sizes, train_path, len(dense_columns))
model_op = PredictHugeCTR(model, train_dataset.schema, max_nnz=2, device_list=[0])
model_repository_path = os.path.join(tmpdir, "model_repository")
input_schema = train_dataset.schema
triton_chain = input_schema.column_names >> model_op
ens = Ensemble(triton_chain, input_schema)
os.makedirs(model_repository_path)
enc_config, node_configs = ens.export(model_repository_path)
assert enc_config
assert len(node_configs) == 1
assert node_configs[0].name == "0_predicthugectr"
df = train_dataset.to_ddf().compute()[:5]
dense, cats, rowptr = _convert(df, slot_sizes, categorical_columns, labels=["label"])
response = _run_ensemble_on_tritonserver(
model_repository_path,
["OUTPUT0"],
df,
"ensemble_model",
backend_config=f"hugectr,ps={tmpdir}/model_repository/ps.json",
)
tests/unit/systems/hugectr/test_hugectr.py:314:
tests/unit/systems/utils/triton.py:39: in _run_ensemble_on_tritonserver
with run_triton_server(tmpdir, backend_config=backend_config) as client:
/usr/lib/python3.8/contextlib.py:113: in enter
return next(self.gen)
modelpath = '/tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/model_repository'
backend_config = 'hugectr,ps=/tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/model_repository/ps.json'
@contextlib.contextmanager
def run_triton_server(modelpath, backend_config="tensorflow,version=2"):
"""This function starts up a Triton server instance and returns a client to it.
Parameters
----------
modelpath : string
The path to the model to load.
Yields
------
client: tritonclient.InferenceServerClient
The client connected to the Triton server.
"""
cmdline = [
TRITON_SERVER_PATH,
"--model-repository",
modelpath,
f"--backend-config={backend_config}",
]
env = os.environ.copy()
env["CUDA_VISIBLE_DEVICES"] = "0"
with subprocess.Popen(cmdline, env=env) as process:
try:
with grpcclient.InferenceServerClient("localhost:8001") as client:
# wait until server is ready
for _ in range(60):
if process.poll() is not None:
retcode = process.returncode
raise RuntimeError(f"Tritonserver failed to start (ret={retcode})")
E RuntimeError: Tritonserver failed to start (ret=1)
merlin/systems/triton/utils.py:46: RuntimeError
----------------------------- Captured stdout call -----------------------------
HugeCTR Version: 3.7
====================================================Model Init=====================================================
[HCTR][12:43:58.864][WARNING][RK0][main]: The model name is not specified when creating the solver.
[HCTR][12:43:58.864][WARNING][RK0][main]: MPI was already initialized somewhere elese. Lifetime service disabled.
[HCTR][12:43:58.864][INFO][RK0][main]: Global seed is 832437304
[HCTR][12:43:58.903][INFO][RK0][main]: Device to NUMA mapping:
GPU 0 -> node 0
[HCTR][12:43:59.471][WARNING][RK0][main]: Peer-to-peer access cannot be fully enabled.
[HCTR][12:43:59.471][INFO][RK0][main]: Start all2all warmup
[HCTR][12:43:59.471][INFO][RK0][main]: End all2all warmup
[HCTR][12:43:59.472][INFO][RK0][main]: Using All-reduce algorithm: NCCL
[HCTR][12:43:59.472][INFO][RK0][main]: Device 0: Tesla P100-DGXS-16GB
[HCTR][12:43:59.472][INFO][RK0][main]: num of DataReader workers: 1
[HCTR][12:43:59.473][INFO][RK0][main]: Vocabulary size: 0
[HCTR][12:43:59.473][INFO][RK0][main]: max_vocabulary_size_per_gpu_=584362
[HCTR][12:43:59.473][DEBUG][RK0][tid #140066899773184]: file_name_ /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][12:43:59.473][DEBUG][RK0][tid #140066891380480]: file_name_ /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][12:43:59.475][INFO][RK0][main]: Graph analysis to resolve tensor dependency
===================================================Model Compile===================================================
[HCTR][12:43:59.771][INFO][RK0][main]: gpu0 start to init embedding
[HCTR][12:43:59.772][INFO][RK0][main]: gpu0 init embedding done
[HCTR][12:43:59.773][INFO][RK0][main]: Starting AUC NCCL warm-up
[HCTR][12:43:59.773][INFO][RK0][main]: Warm-up done
===================================================Model Summary===================================================
[HCTR][12:43:59.773][INFO][RK0][main]: label Dense Sparse
label dense data1
(None, 1) (None, 1)
——————————————————————————————————————————————————————————————————————————————————————————————————————————————————
Layer Type Input Name Output Name Output Shape
——————————————————————————————————————————————————————————————————————————————————————————————————————————————————
DistributedSlotSparseEmbeddingHash data1 sparse_embedding1 (None, 3, 16)
InnerProduct dense fc1 (None, 512)
Reshape sparse_embedding1 reshape1 (None, 48)
InnerProduct reshape1 fc2 (None, 1)
fc1
BinaryCrossEntropyLoss fc2 loss
label
=====================================================Model Fit=====================================================
[HCTR][12:43:59.774][INFO][RK0][main]: Use non-epoch mode with number of iterations: 20
[HCTR][12:43:59.774][INFO][RK0][main]: Training batchsize: 10, evaluation batchsize: 10
[HCTR][12:43:59.774][INFO][RK0][main]: Evaluation interval: 200, snapshot interval: 10
[HCTR][12:43:59.774][INFO][RK0][main]: Dense network trainable: True
[HCTR][12:43:59.774][INFO][RK0][main]: Sparse embedding sparse_embedding1 trainable: True
[HCTR][12:43:59.774][INFO][RK0][main]: Use mixed precision: False, scaler: 1.000000, use cuda graph: True
[HCTR][12:43:59.774][INFO][RK0][main]: lr: 0.001000, warmup_steps: 1, end_lr: 0.000000
[HCTR][12:43:59.774][INFO][RK0][main]: decay_start: 0, decay_steps: 1, decay_power: 2.000000
[HCTR][12:43:59.774][INFO][RK0][main]: Training source file: /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/train/file_list.txt
[HCTR][12:43:59.774][INFO][RK0][main]: Evaluation source file: /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/train/file_list.txt
[HCTR][12:43:59.779][DEBUG][RK0][tid #140066899773184]: file_name /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/train/part_0.parquet file_total_rows 64
[HCTR][12:43:59.783][DEBUG][RK0][tid #140066899773184]: file_name_ /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][12:43:59.787][INFO][RK0][main]: Rank0: Write hash table to file
[HCTR][12:43:59.788][INFO][RK0][main]: Dumping sparse weights to files, successful
[HCTR][12:43:59.813][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][12:43:59.848][INFO][RK0][main]: Done
[HCTR][12:43:59.867][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][12:43:59.904][INFO][RK0][main]: Done
[HCTR][12:43:59.906][INFO][RK0][main]: Dumping sparse optimzer states to files, successful
[HCTR][12:43:59.906][INFO][RK0][main]: Dumping dense weights to file, successful
[HCTR][12:43:59.906][INFO][RK0][main]: Dumping dense optimizer states to file, successful
[HCTR][12:43:59.911][DEBUG][RK0][tid #140066899773184]: file_name_ /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/train/part_0.parquet file_total_rows_ 64
[HCTR][12:43:59.913][INFO][RK0][main]: Finish 20 iterations with batchsize: 10 in 0.14s.
[HCTR][12:43:59.915][INFO][RK0][main]: Save the model graph to /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/model_repository/0_hugectr/1/0_hugectr.json successfully
[HCTR][12:43:59.916][INFO][RK0][main]: Rank0: Write hash table to file
[HCTR][12:43:59.916][INFO][RK0][main]: Dumping sparse weights to files, successful
[HCTR][12:43:59.933][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][12:43:59.969][INFO][RK0][main]: Done
[HCTR][12:43:59.987][INFO][RK0][main]: Rank0: Write optimzer state to file
[HCTR][12:44:00.024][INFO][RK0][main]: Done
[HCTR][12:44:00.026][INFO][RK0][main]: Dumping sparse optimzer states to files, successful
[HCTR][12:44:00.026][INFO][RK0][main]: Dumping dense weights to file, successful
[HCTR][12:44:00.026][INFO][RK0][main]: Dumping dense optimizer states to file, successful
----------------------------- Captured stderr call -----------------------------
I0705 12:44:00.316030 3734 pinned_memory_manager.cc:240] Pinned memory pool is created at '0x7fdd24000000' with size 268435456
I0705 12:44:00.316781 3734 cuda_memory_manager.cc:105] CUDA memory pool is created on device 0 with size 67108864
I0705 12:44:00.319964 3734 model_repository_manager.cc:1191] loading: 0_predicthugectr:1
I0705 12:44:00.420272 3734 model_repository_manager.cc:1191] loading: 0_hugectr:1
I0705 12:44:00.424791 3734 python.cc:2388] TRITONBACKEND_ModelInstanceInitialize: 0_predicthugectr (GPU device 0)
I0705 12:44:03.110730 3734 model_repository_manager.cc:1345] successfully loaded '0_predicthugectr' version 1
I0705 12:44:03.144641 3734 hugectr.cc:1738] TRITONBACKEND_Initialize: hugectr
I0705 12:44:03.144673 3734 hugectr.cc:1745] Triton TRITONBACKEND API version: 1.9
I0705 12:44:03.144685 3734 hugectr.cc:1749] 'hugectr' TRITONBACKEND API version: 1.10
I0705 12:44:03.144699 3734 hugectr.cc:1827] TRITONBACKEND_Backend Finalize: HugectrBackend
E0705 12:44:03.144716 3734 model_repository_manager.cc:1348] failed to load '0_hugectr' version 1: Unsupported: Triton backend API version does not support this backend
I0705 12:44:03.145020 3734 model_repository_manager.cc:1191] loading: ensemble_model:1
I0705 12:44:03.272356 3734 model_repository_manager.cc:1345] successfully loaded 'ensemble_model' version 1
I0705 12:44:03.272526 3734 server.cc:556]
+------------------+------+
| Repository Agent | Path |
+------------------+------+
+------------------+------+
I0705 12:44:03.272627 3734 server.cc:583]
+---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Backend | Path | Config |
+---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
| python | /opt/tritonserver/backends/python/libtriton_python.so | {"cmdline":{"auto-complete-config":"false","min-compute-capability":"6.000000","backend-directory":"/opt/tritonserver/backends","default-max-batch-size":"4"}} |
+---------+-------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0705 12:44:03.272750 3734 server.cc:626]
+------------------+---------+------------------------------------------------------------------------------------+
| Model | Version | Status |
+------------------+---------+------------------------------------------------------------------------------------+
| 0_hugectr | 1 | UNAVAILABLE: Unsupported: Triton backend API version does not support this backend |
| 0_predicthugectr | 1 | READY |
| ensemble_model | 1 | READY |
+------------------+---------+------------------------------------------------------------------------------------+
I0705 12:44:03.338094 3734 metrics.cc:650] Collecting metrics for GPU 0: Tesla P100-DGXS-16GB
I0705 12:44:03.338985 3734 tritonserver.cc:2138]
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Option | Value |
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| server_id | triton |
| server_version | 2.22.0 |
| server_extensions | classification sequence model_repository model_repository(unload_dependents) schedule_policy model_configuration system_shared_memory cuda_shared_memory binary_tensor_data statistics trace |
| model_repository_path[0] | /tmp/pytest-of-jenkins/pytest-6/test_predict_hugectr0/model_repository |
| model_control_mode | MODE_NONE |
| strict_model_config | 1 |
| rate_limit | OFF |
| pinned_memory_pool_byte_size | 268435456 |
| cuda_memory_pool_byte_size{0} | 67108864 |
| response_cache_byte_size | 0 |
| min_supported_compute_capability | 6.0 |
| strict_readiness | 1 |
| exit_timeout | 30 |
+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
I0705 12:44:03.339022 3734 server.cc:257] Waiting for in-flight requests to complete.
I0705 12:44:03.339032 3734 server.cc:273] Timeout 30: Found 0 model versions that have in-flight inferences
I0705 12:44:03.339041 3734 model_repository_manager.cc:1223] unloading: ensemble_model:1
I0705 12:44:03.339080 3734 model_repository_manager.cc:1223] unloading: 0_predicthugectr:1
I0705 12:44:03.339154 3734 server.cc:288] All models are stopped, unloading models
I0705 12:44:03.339164 3734 server.cc:295] Timeout 30: Found 2 live models and 0 in-flight non-inference requests
I0705 12:44:03.339264 3734 model_repository_manager.cc:1328] successfully unloaded 'ensemble_model' version 1
I0705 12:44:04.339254 3734 server.cc:295] Timeout 29: Found 1 live models and 0 in-flight non-inference requests
W0705 12:44:04.356687 3734 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0
W0705 12:44:04.356746 3734 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0
I0705 12:44:04.765274 3734 model_repository_manager.cc:1328] successfully unloaded '0_predicthugectr' version 1
I0705 12:44:05.339401 3734 server.cc:295] Timeout 28: Found 0 live models and 0 in-flight non-inference requests
error: creating server: Internal - failed to load all models
W0705 12:44:05.356913 3734 metrics.cc:468] Unable to get energy consumption for GPU 0. Status:Success, value:0
W0705 12:44:05.356961 3734 metrics.cc:507] Unable to get memory usage for GPU 0. Memory usage status:Success, value:0. Memory total status:Success, value:0
=============================== warnings summary ===============================
../../../.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18
/var/jenkins_home/.local/lib/python3.8/site-packages/nvtabular/framework_utils/init.py:18: DeprecationWarning: The nvtabular.framework_utils module is being replaced by the Merlin Models library. Support for importing from nvtabular.framework_utils is deprecated, and will be removed in a future version. Please consider using the models and layers from Merlin Models instead.
warnings.warn(
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_config_verification[parquet]
tests/unit/systems/test_ensemble.py::test_workflow_tf_e2e_multi_op_run[parquet]
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_validates_schemas[parquet]
tests/unit/systems/test_inference_ops.py::test_workflow_op_exports_own_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_config[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_loads_multiple_ops_same_execute[parquet]
tests/unit/systems/test_op_runner.py::test_op_runner_single_node_export[parquet]
/usr/local/lib/python3.8/dist-packages/cudf/core/frame.py:384: UserWarning: The deep parameter is ignored and is only included for pandas compatibility.
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column x is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column y is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/test_export.py::test_export_run_ensemble_triton[tensorflow-parquet]
/var/jenkins_home/workspace/merlin_systems/systems/merlin/systems/triton/export.py:304: UserWarning: Column id is being generated by NVTabular workflow but is unused in test_name_tf model
warnings.warn(
tests/unit/systems/fil/test_fil.py::test_binary_classifier_default[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_binary_classifier_with_proba[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_multi_classifier[sklearn_forest_classifier-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_regressor[sklearn_forest_regressor-get_model_params4]
tests/unit/systems/fil/test_fil.py::test_model_file[sklearn_forest_regressor-checkpoint.tl]
/usr/local/lib/python3.8/dist-packages/sklearn/utils/deprecation.py:103: FutureWarning: Attribute n_features_ was deprecated in version 1.0 and will be removed in 1.2. Use n_features_in_ instead.
warnings.warn(msg, category=FutureWarning)
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html
=========================== short test summary info ============================
FAILED tests/unit/systems/fil/test_forest.py::test_load_from_config - Asserti...
FAILED tests/unit/systems/hugectr/test_hugectr.py::test_predict_hugectr - Run...
======= 2 failed, 47 passed, 1 skipped, 18 warnings in 172.43s (0:02:52) =======
Build step 'Execute shell' marked build as failure
Performing Post build task...
Match found for : : True
Logical operation result is TRUE
Running script : #!/bin/bash
cd /var/jenkins_home/
CUDA_VISIBLE_DEVICES=1 python test_res_push.py "https://api.GitHub.com/repos/NVIDIA-Merlin/systems/issues/$ghprbPullId/comments" "/var/jenkins_home/jobs/$JOB_NAME/builds/$BUILD_NUMBER/log"
[merlin_systems] $ /bin/bash /tmp/jenkins1025052810390896047.sh