sleap
sleap copied to clipboard
Bottom-up does not work for single-node Skeletons: `ValueError: Shape must be at least rank 2 but is rank 1`
We should change the bottom-up pipeline to handle cases where the Skeleton only has a single node. Currently, bottom-up will fail when trying to group nodes into instances via PAFs.
Discussed in https://github.com/talmolab/sleap/discussions/1246
Originally posted by Jaspermoray March 28, 2023 Hi!
I started trying to run sleap on google colab, and I ran into a bug that I can't figure out how to fix, can you help?
Here is my code:
from google.colab import drive
drive.mount('/content/drive/')
import os
os.chdir("/content/drive/My Drive/sleap")
!unzip Colab.slp.training_job.zip
!ls
!sleap-train multi_instance.json Colab.pkg.slp
And here is the error:
Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
Archive: Colab.slp.training_job.zip
replace multi_instance.json? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
inflating: multi_instance.json
replace train-script.sh? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
inflating: train-script.sh
replace inference-script.sh? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
inflating: inference-script.sh
replace jobs.yaml? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
inflating: jobs.yaml
replace Colab.pkg.slp? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
inflating: Colab.pkg.slp
33instances.v002.pkg.slp inference-script.sh
33instances.v002.slp.training_job.zip jobs.yaml
Colab_demo.bottomup multi_instance.json
Colab.pkg.slp train-script.sh
Colab.slp.training_job.zip
INFO:numexpr.utils:NumExpr defaulting to 2 threads.
INFO:sleap.nn.training:Versions:
SLEAP: 1.3.0
TensorFlow: 2.8.4
Numpy: 1.22.4
Python: 3.9.16
OS: Linux-5.10.147+-x86_64-with-glibc2.31
INFO:sleap.nn.training:Training labels file: Colab.pkg.slp
INFO:sleap.nn.training:Training profile: multi_instance.json
INFO:sleap.nn.training:
INFO:sleap.nn.training:Arguments:
INFO:sleap.nn.training:{
"training_job_path": "multi_instance.json",
"labels_path": "Colab.pkg.slp",
"video_paths": [
""
],
"val_labels": null,
"test_labels": null,
"base_checkpoint": null,
"tensorboard": false,
"save_viz": false,
"zmq": false,
"run_name": "",
"prefix": "",
"suffix": "",
"cpu": false,
"first_gpu": false,
"last_gpu": false,
"gpu": "auto"
}
INFO:sleap.nn.training:
INFO:sleap.nn.training:Training job:
INFO:sleap.nn.training:{
"data": {
"labels": {
"training_labels": null,
"validation_labels": null,
"validation_fraction": 0.1,
"test_labels": null,
"split_by_inds": false,
"training_inds": null,
"validation_inds": null,
"test_inds": null,
"search_path_hints": [],
"skeletons": []
},
"preprocessing": {
"ensure_rgb": false,
"ensure_grayscale": false,
"imagenet_mode": null,
"input_scaling": 1.0,
"pad_to_stride": null,
"resize_and_pad_to_target": true,
"target_height": null,
"target_width": null
},
"instance_cropping": {
"center_on_part": null,
"crop_size": null,
"crop_size_detection_padding": 16
}
},
"model": {
"backbone": {
"leap": null,
"unet": {
"stem_stride": null,
"max_stride": 32,
"output_stride": 4,
"filters": 16,
"filters_rate": 2.0,
"middle_block": true,
"up_interpolate": true,
"stacks": 1
},
"hourglass": null,
"resnet": null,
"pretrained_encoder": null
},
"heads": {
"single_instance": null,
"centroid": null,
"centered_instance": null,
"multi_instance": {
"confmaps": {
"part_names": null,
"sigma": 2.5,
"output_stride": 4,
"loss_weight": 1.0,
"offset_refinement": false
},
"pafs": {
"edges": null,
"sigma": 75.0,
"output_stride": 8,
"loss_weight": 1.0
}
},
"multi_class_bottomup": null,
"multi_class_topdown": null
},
"base_checkpoint": null
},
"optimization": {
"preload_data": true,
"augmentation_config": {
"rotate": true,
"rotation_min_angle": -15.0,
"rotation_max_angle": 15.0,
"translate": false,
"translate_min": -5,
"translate_max": 5,
"scale": false,
"scale_min": 0.9,
"scale_max": 1.1,
"uniform_noise": false,
"uniform_noise_min_val": 0.0,
"uniform_noise_max_val": 10.0,
"gaussian_noise": false,
"gaussian_noise_mean": 5.0,
"gaussian_noise_stddev": 1.0,
"contrast": false,
"contrast_min_gamma": 0.5,
"contrast_max_gamma": 2.0,
"brightness": false,
"brightness_min_val": 0.0,
"brightness_max_val": 10.0,
"random_crop": false,
"random_crop_height": 256,
"random_crop_width": 256,
"random_flip": true,
"flip_horizontal": false
},
"online_shuffling": true,
"shuffle_buffer_size": 128,
"prefetch": true,
"batch_size": 4,
"batches_per_epoch": null,
"min_batches_per_epoch": 200,
"val_batches_per_epoch": null,
"min_val_batches_per_epoch": 10,
"epochs": 200,
"optimizer": "adam",
"initial_learning_rate": 0.0001,
"learning_rate_schedule": {
"reduce_on_plateau": true,
"reduction_factor": 0.5,
"plateau_min_delta": 1e-06,
"plateau_patience": 5,
"plateau_cooldown": 3,
"min_learning_rate": 1e-08
},
"hard_keypoint_mining": {
"online_mining": false,
"hard_to_easy_ratio": 2.0,
"min_hard_keypoints": 2,
"max_hard_keypoints": null,
"loss_scale": 5.0
},
"early_stopping": {
"stop_training_on_plateau": true,
"plateau_min_delta": 1e-08,
"plateau_patience": 10
}
},
"outputs": {
"save_outputs": true,
"run_name": "230308_155737",
"run_name_prefix": "",
"run_name_suffix": ".multi_instance",
"runs_folder": "/Volumes/JasperCray/sleap/models",
"tags": [
""
],
"save_visualizations": true,
"delete_viz_images": true,
"zip_outputs": false,
"log_to_csv": true,
"checkpointing": {
"initial_model": false,
"best_model": true,
"every_epoch": false,
"latest_model": false,
"final_model": false
},
"tensorboard": {
"write_logs": false,
"loss_frequency": "epoch",
"architecture_graph": false,
"profile_graph": false,
"visualizations": true
},
"zmq": {
"subscribe_to_controller": false,
"controller_address": "tcp://127.0.0.1:9000",
"controller_polling_timeout": 10,
"publish_updates": false,
"publish_address": "tcp://127.0.0.1:9001"
}
},
"name": "",
"description": "",
"sleap_version": "1.3.0a0",
"filename": "multi_instance.json"
}
INFO:sleap.nn.training:
2023-03-28 19:33:05.007420: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
INFO:sleap.nn.training:Running in CPU-only mode.
INFO:sleap.nn.training:System:
GPUs: None detected.
INFO:sleap.nn.training:
INFO:sleap.nn.training:Initializing trainer...
INFO:sleap.nn.training:Loading training labels from: Colab.pkg.slp
INFO:sleap.nn.training:Creating training and validation splits from validation fraction: 0.1
INFO:sleap.nn.training: Splits: Training = 18 / Validation = 2.
INFO:sleap.nn.training:Setting up for training...
INFO:sleap.nn.training:Setting up pipeline builders...
INFO:sleap.nn.training:Setting up model...
INFO:sleap.nn.training:Building test pipeline...
INFO:sleap.nn.training:Loaded test example. [2.858s]
INFO:sleap.nn.training: Input shape: (1088, 1920, 3)
INFO:sleap.nn.training:Created Keras model.
INFO:sleap.nn.training: Backbone: UNet(stacks=1, filters=16, filters_rate=2.0, kernel_size=3, stem_kernel_size=7, convs_per_block=2, stem_blocks=0, down_blocks=5, middle_block=True, up_blocks=3, up_interpolate=True, block_contraction=False)
INFO:sleap.nn.training: Max stride: 32
INFO:sleap.nn.training: Parameters: 7,816,273
INFO:sleap.nn.training: Heads:
INFO:sleap.nn.training: [0] = MultiInstanceConfmapsHead(part_names=['Thorax'], sigma=2.5, output_stride=4, loss_weight=1.0)
INFO:sleap.nn.training: [1] = PartAffinityFieldsHead(edges=[], sigma=75.0, output_stride=8, loss_weight=1.0)
INFO:sleap.nn.training: Outputs:
INFO:sleap.nn.training: [0] = KerasTensor(type_spec=TensorSpec(shape=(None, 272, 480, 1), dtype=tf.float32, name=None), name='MultiInstanceConfmapsHead/BiasAdd:0', description="created by layer 'MultiInstanceConfmapsHead'")
INFO:sleap.nn.training: [1] = KerasTensor(type_spec=TensorSpec(shape=(None, 136, 240, 0), dtype=tf.float32, name=None), name='PartAffinityFieldsHead/BiasAdd:0', description="created by layer 'PartAffinityFieldsHead'")
INFO:sleap.nn.training:Training from scratch
INFO:sleap.nn.training:Setting up data pipelines...
INFO:sleap.nn.training:Training set: n = 18
INFO:sleap.nn.training:Validation set: n = 2
INFO:sleap.nn.training:Setting up optimization...
INFO:sleap.nn.training: Learning rate schedule: LearningRateScheduleConfig(reduce_on_plateau=True, reduction_factor=0.5, plateau_min_delta=1e-06, plateau_patience=5, plateau_cooldown=3, min_learning_rate=1e-08)
INFO:sleap.nn.training: Early stopping: EarlyStoppingConfig(stop_training_on_plateau=True, plateau_min_delta=1e-08, plateau_patience=10)
INFO:sleap.nn.training:Setting up outputs...
INFO:sleap.nn.training:Created run path: /Volumes/JasperCray/sleap/models/230308_155737.multi_instance
INFO:sleap.nn.training:Setting up visualization...
2023-03-28 19:33:15.780280: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: -33 } dim { size: -34 } dim { size: -35 } dim { size: 1 } } } inputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -2 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } } device { type: "CPU" vendor: "GenuineIntel" model: "111" frequency: 2199 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 262144 l3_cache_size: 57671680 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: -36 } dim { size: -37 } dim { size: 1 } } }
2023-03-28 19:33:18.650216: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: -33 } dim { size: -34 } dim { size: -35 } dim { size: 1 } } } inputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -2 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } } device { type: "CPU" vendor: "GenuineIntel" model: "111" frequency: 2199 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 262144 l3_cache_size: 57671680 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: -36 } dim { size: -37 } dim { size: 1 } } }
Unable to use Qt backend for matplotlib. This probably means Qt is running headless.
Unable to use Qt backend for matplotlib. This probably means Qt is running headless.
Unable to use Qt backend for matplotlib. This probably means Qt is running headless.
Unable to use Qt backend for matplotlib. This probably means Qt is running headless.
INFO:sleap.nn.training:Finished trainer set up. [12.1s]
INFO:sleap.nn.training:Creating tf.data.Datasets for training data generation...
Traceback (most recent call last):
File "/usr/local/bin/sleap-train", line 8, in <module>
sys.exit(main())
File "/usr/local/lib/python3.9/dist-packages/sleap/nn/training.py", line 2014, in main
trainer.train()
File "/usr/local/lib/python3.9/dist-packages/sleap/nn/training.py", line 930, in train
training_ds = self.training_pipeline.make_dataset()
File "/usr/local/lib/python3.9/dist-packages/sleap/nn/data/pipelines.py", line 287, in make_dataset
ds = transformer.transform_dataset(ds)
File "/usr/local/lib/python3.9/dist-packages/sleap/nn/data/edge_maps.py", line 356, in transform_dataset
output_ds = input_ds.map(
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 2018, in map
return ParallelMapDataset(
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 5234, in __init__
self._map_func = structured_function.StructuredFunctionWrapper(
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/data/ops/structured_function.py", line 271, in __init__
self._function = fn_factory()
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/function.py", line 3070, in get_concrete_function
graph_function = self._get_concrete_function_garbage_collected(
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/function.py", line 3036, in _get_concrete_function_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/function.py", line 3292, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/function.py", line 3130, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/func_graph.py", line 1161, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/data/ops/structured_function.py", line 248, in wrapped_fn
ret = wrapper_helper(*args)
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/data/ops/structured_function.py", line 177, in wrapper_helper
ret = autograph.tf_convert(self._func, ag_ctx)(*nested_args)
File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/autograph/impl/api.py", line 692, in wrapper
raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:
File "/usr/local/lib/python3.9/dist-packages/sleap/nn/data/edge_maps.py", line 334, in generate_pafs *
edge_sources, edge_destinations = get_edge_points(instances, edge_inds)
File "/usr/local/lib/python3.9/dist-packages/sleap/nn/data/edge_maps.py", line 233, in get_edge_points *
source_inds = tf.cast(tf.gather(edge_inds, 0, axis=1), tf.int32)
ValueError: Shape must be at least rank 2 but is rank 1 for '{{node GatherV2}} = GatherV2[Taxis=DT_INT32, Tindices=DT_INT32, Tparams=DT_INT32, batch_dims=0](GatherV2/params, GatherV2/indices, GatherV2/axis)' with input shapes: [0], [], [] and with computed input tensors: input[2] = <1>.