sleap icon indicating copy to clipboard operation
sleap copied to clipboard

Bottom-up does not work for single-node Skeletons: `ValueError: Shape must be at least rank 2 but is rank 1`

Open roomrys opened this issue 2 years ago • 0 comments

We should change the bottom-up pipeline to handle cases where the Skeleton only has a single node. Currently, bottom-up will fail when trying to group nodes into instances via PAFs.

Discussed in https://github.com/talmolab/sleap/discussions/1246

Originally posted by Jaspermoray March 28, 2023 Hi!

I started trying to run sleap on google colab, and I ran into a bug that I can't figure out how to fix, can you help?

Here is my code:

  from google.colab import drive
  drive.mount('/content/drive/')
  import os
  os.chdir("/content/drive/My Drive/sleap")
  !unzip Colab.slp.training_job.zip
  !ls
  !sleap-train multi_instance.json Colab.pkg.slp

And here is the error:

  Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
  Archive:  Colab.slp.training_job.zip
  replace multi_instance.json? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
    inflating: multi_instance.json     
  replace train-script.sh? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
    inflating: train-script.sh         
  replace inference-script.sh? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
    inflating: inference-script.sh     
  replace jobs.yaml? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
    inflating: jobs.yaml               
  replace Colab.pkg.slp? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
    inflating: Colab.pkg.slp           
  33instances.v002.pkg.slp	       inference-script.sh
  33instances.v002.slp.training_job.zip  jobs.yaml
  Colab_demo.bottomup		       multi_instance.json
  Colab.pkg.slp			       train-script.sh
  Colab.slp.training_job.zip
  INFO:numexpr.utils:NumExpr defaulting to 2 threads.
  INFO:sleap.nn.training:Versions:
  SLEAP: 1.3.0
  TensorFlow: 2.8.4
  Numpy: 1.22.4
  Python: 3.9.16
  OS: Linux-5.10.147+-x86_64-with-glibc2.31
  INFO:sleap.nn.training:Training labels file: Colab.pkg.slp
  INFO:sleap.nn.training:Training profile: multi_instance.json
  INFO:sleap.nn.training:
  INFO:sleap.nn.training:Arguments:
  INFO:sleap.nn.training:{
      "training_job_path": "multi_instance.json",
      "labels_path": "Colab.pkg.slp",
      "video_paths": [
          ""
      ],
      "val_labels": null,
      "test_labels": null,
      "base_checkpoint": null,
      "tensorboard": false,
      "save_viz": false,
      "zmq": false,
      "run_name": "",
      "prefix": "",
      "suffix": "",
      "cpu": false,
      "first_gpu": false,
      "last_gpu": false,
      "gpu": "auto"
  }
  INFO:sleap.nn.training:
  INFO:sleap.nn.training:Training job:
  INFO:sleap.nn.training:{
      "data": {
          "labels": {
              "training_labels": null,
              "validation_labels": null,
              "validation_fraction": 0.1,
              "test_labels": null,
              "split_by_inds": false,
              "training_inds": null,
              "validation_inds": null,
              "test_inds": null,
              "search_path_hints": [],
              "skeletons": []
          },
          "preprocessing": {
              "ensure_rgb": false,
              "ensure_grayscale": false,
              "imagenet_mode": null,
              "input_scaling": 1.0,
              "pad_to_stride": null,
              "resize_and_pad_to_target": true,
              "target_height": null,
              "target_width": null
          },
          "instance_cropping": {
              "center_on_part": null,
              "crop_size": null,
              "crop_size_detection_padding": 16
          }
      },
      "model": {
          "backbone": {
              "leap": null,
              "unet": {
                  "stem_stride": null,
                  "max_stride": 32,
                  "output_stride": 4,
                  "filters": 16,
                  "filters_rate": 2.0,
                  "middle_block": true,
                  "up_interpolate": true,
                  "stacks": 1
              },
              "hourglass": null,
              "resnet": null,
              "pretrained_encoder": null
          },
          "heads": {
              "single_instance": null,
              "centroid": null,
              "centered_instance": null,
              "multi_instance": {
                  "confmaps": {
                      "part_names": null,
                      "sigma": 2.5,
                      "output_stride": 4,
                      "loss_weight": 1.0,
                      "offset_refinement": false
                  },
                  "pafs": {
                      "edges": null,
                      "sigma": 75.0,
                      "output_stride": 8,
                      "loss_weight": 1.0
                  }
              },
              "multi_class_bottomup": null,
              "multi_class_topdown": null
          },
          "base_checkpoint": null
      },
      "optimization": {
          "preload_data": true,
          "augmentation_config": {
              "rotate": true,
              "rotation_min_angle": -15.0,
              "rotation_max_angle": 15.0,
              "translate": false,
              "translate_min": -5,
              "translate_max": 5,
              "scale": false,
              "scale_min": 0.9,
              "scale_max": 1.1,
              "uniform_noise": false,
              "uniform_noise_min_val": 0.0,
              "uniform_noise_max_val": 10.0,
              "gaussian_noise": false,
              "gaussian_noise_mean": 5.0,
              "gaussian_noise_stddev": 1.0,
              "contrast": false,
              "contrast_min_gamma": 0.5,
              "contrast_max_gamma": 2.0,
              "brightness": false,
              "brightness_min_val": 0.0,
              "brightness_max_val": 10.0,
              "random_crop": false,
              "random_crop_height": 256,
              "random_crop_width": 256,
              "random_flip": true,
              "flip_horizontal": false
          },
          "online_shuffling": true,
          "shuffle_buffer_size": 128,
          "prefetch": true,
          "batch_size": 4,
          "batches_per_epoch": null,
          "min_batches_per_epoch": 200,
          "val_batches_per_epoch": null,
          "min_val_batches_per_epoch": 10,
          "epochs": 200,
          "optimizer": "adam",
          "initial_learning_rate": 0.0001,
          "learning_rate_schedule": {
              "reduce_on_plateau": true,
              "reduction_factor": 0.5,
              "plateau_min_delta": 1e-06,
              "plateau_patience": 5,
              "plateau_cooldown": 3,
              "min_learning_rate": 1e-08
          },
          "hard_keypoint_mining": {
              "online_mining": false,
              "hard_to_easy_ratio": 2.0,
              "min_hard_keypoints": 2,
              "max_hard_keypoints": null,
              "loss_scale": 5.0
          },
          "early_stopping": {
              "stop_training_on_plateau": true,
              "plateau_min_delta": 1e-08,
              "plateau_patience": 10
          }
      },
      "outputs": {
          "save_outputs": true,
          "run_name": "230308_155737",
          "run_name_prefix": "",
          "run_name_suffix": ".multi_instance",
          "runs_folder": "/Volumes/JasperCray/sleap/models",
          "tags": [
              ""
          ],
          "save_visualizations": true,
          "delete_viz_images": true,
          "zip_outputs": false,
          "log_to_csv": true,
          "checkpointing": {
              "initial_model": false,
              "best_model": true,
              "every_epoch": false,
              "latest_model": false,
              "final_model": false
          },
          "tensorboard": {
              "write_logs": false,
              "loss_frequency": "epoch",
              "architecture_graph": false,
              "profile_graph": false,
              "visualizations": true
          },
          "zmq": {
              "subscribe_to_controller": false,
              "controller_address": "tcp://127.0.0.1:9000",
              "controller_polling_timeout": 10,
              "publish_updates": false,
              "publish_address": "tcp://127.0.0.1:9001"
          }
      },
      "name": "",
      "description": "",
      "sleap_version": "1.3.0a0",
      "filename": "multi_instance.json"
  }
  INFO:sleap.nn.training:
  2023-03-28 19:33:05.007420: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
  INFO:sleap.nn.training:Running in CPU-only mode.
  INFO:sleap.nn.training:System:
  GPUs: None detected.
  INFO:sleap.nn.training:
  INFO:sleap.nn.training:Initializing trainer...
  INFO:sleap.nn.training:Loading training labels from: Colab.pkg.slp
  INFO:sleap.nn.training:Creating training and validation splits from validation fraction: 0.1
  INFO:sleap.nn.training:  Splits: Training = 18 / Validation = 2.
  INFO:sleap.nn.training:Setting up for training...
  INFO:sleap.nn.training:Setting up pipeline builders...
  INFO:sleap.nn.training:Setting up model...
  INFO:sleap.nn.training:Building test pipeline...
  INFO:sleap.nn.training:Loaded test example. [2.858s]
  INFO:sleap.nn.training:  Input shape: (1088, 1920, 3)
  INFO:sleap.nn.training:Created Keras model.
  INFO:sleap.nn.training:  Backbone: UNet(stacks=1, filters=16, filters_rate=2.0, kernel_size=3, stem_kernel_size=7, convs_per_block=2, stem_blocks=0, down_blocks=5, middle_block=True, up_blocks=3, up_interpolate=True, block_contraction=False)
  INFO:sleap.nn.training:  Max stride: 32
  INFO:sleap.nn.training:  Parameters: 7,816,273
  INFO:sleap.nn.training:  Heads: 
  INFO:sleap.nn.training:    [0] = MultiInstanceConfmapsHead(part_names=['Thorax'], sigma=2.5, output_stride=4, loss_weight=1.0)
  INFO:sleap.nn.training:    [1] = PartAffinityFieldsHead(edges=[], sigma=75.0, output_stride=8, loss_weight=1.0)
  INFO:sleap.nn.training:  Outputs: 
  INFO:sleap.nn.training:    [0] = KerasTensor(type_spec=TensorSpec(shape=(None, 272, 480, 1), dtype=tf.float32, name=None), name='MultiInstanceConfmapsHead/BiasAdd:0', description="created by layer 'MultiInstanceConfmapsHead'")
  INFO:sleap.nn.training:    [1] = KerasTensor(type_spec=TensorSpec(shape=(None, 136, 240, 0), dtype=tf.float32, name=None), name='PartAffinityFieldsHead/BiasAdd:0', description="created by layer 'PartAffinityFieldsHead'")
  INFO:sleap.nn.training:Training from scratch
  INFO:sleap.nn.training:Setting up data pipelines...
  INFO:sleap.nn.training:Training set: n = 18
  INFO:sleap.nn.training:Validation set: n = 2
  INFO:sleap.nn.training:Setting up optimization...
  INFO:sleap.nn.training:  Learning rate schedule: LearningRateScheduleConfig(reduce_on_plateau=True, reduction_factor=0.5, plateau_min_delta=1e-06, plateau_patience=5, plateau_cooldown=3, min_learning_rate=1e-08)
  INFO:sleap.nn.training:  Early stopping: EarlyStoppingConfig(stop_training_on_plateau=True, plateau_min_delta=1e-08, plateau_patience=10)
  INFO:sleap.nn.training:Setting up outputs...
  INFO:sleap.nn.training:Created run path: /Volumes/JasperCray/sleap/models/230308_155737.multi_instance
  INFO:sleap.nn.training:Setting up visualization...
  2023-03-28 19:33:15.780280: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: -33 } dim { size: -34 } dim { size: -35 } dim { size: 1 } } } inputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -2 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } } device { type: "CPU" vendor: "GenuineIntel" model: "111" frequency: 2199 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 262144 l3_cache_size: 57671680 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: -36 } dim { size: -37 } dim { size: 1 } } }
  2023-03-28 19:33:18.650216: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "CropAndResize" attr { key: "T" value { type: DT_FLOAT } } attr { key: "extrapolation_value" value { f: 0 } } attr { key: "method" value { s: "bilinear" } } inputs { dtype: DT_FLOAT shape { dim { size: -33 } dim { size: -34 } dim { size: -35 } dim { size: 1 } } } inputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: 4 } } } inputs { dtype: DT_INT32 shape { dim { size: -2 } } } inputs { dtype: DT_INT32 shape { dim { size: 2 } } } device { type: "CPU" vendor: "GenuineIntel" model: "111" frequency: 2199 num_cores: 2 environment { key: "cpu_instruction_set" value: "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 32768 l2_cache_size: 262144 l3_cache_size: 57671680 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { dim { size: -2 } dim { size: -36 } dim { size: -37 } dim { size: 1 } } }
  Unable to use Qt backend for matplotlib. This probably means Qt is running headless.
  Unable to use Qt backend for matplotlib. This probably means Qt is running headless.
  Unable to use Qt backend for matplotlib. This probably means Qt is running headless.
  Unable to use Qt backend for matplotlib. This probably means Qt is running headless.
  INFO:sleap.nn.training:Finished trainer set up. [12.1s]
  INFO:sleap.nn.training:Creating tf.data.Datasets for training data generation...
  Traceback (most recent call last):
    File "/usr/local/bin/sleap-train", line 8, in <module>
      sys.exit(main())
    File "/usr/local/lib/python3.9/dist-packages/sleap/nn/training.py", line 2014, in main
      trainer.train()
    File "/usr/local/lib/python3.9/dist-packages/sleap/nn/training.py", line 930, in train
      training_ds = self.training_pipeline.make_dataset()
    File "/usr/local/lib/python3.9/dist-packages/sleap/nn/data/pipelines.py", line 287, in make_dataset
      ds = transformer.transform_dataset(ds)
    File "/usr/local/lib/python3.9/dist-packages/sleap/nn/data/edge_maps.py", line 356, in transform_dataset
      output_ds = input_ds.map(
    File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 2018, in map
      return ParallelMapDataset(
    File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 5234, in __init__
      self._map_func = structured_function.StructuredFunctionWrapper(
    File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/data/ops/structured_function.py", line 271, in __init__
      self._function = fn_factory()
    File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/function.py", line 3070, in get_concrete_function
      graph_function = self._get_concrete_function_garbage_collected(
    File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/function.py", line 3036, in _get_concrete_function_garbage_collected
      graph_function, _ = self._maybe_define_function(args, kwargs)
    File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/function.py", line 3292, in _maybe_define_function
      graph_function = self._create_graph_function(args, kwargs)
    File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/function.py", line 3130, in _create_graph_function
      func_graph_module.func_graph_from_py_func(
    File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/framework/func_graph.py", line 1161, in func_graph_from_py_func
      func_outputs = python_func(*func_args, **func_kwargs)
    File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/data/ops/structured_function.py", line 248, in wrapped_fn
      ret = wrapper_helper(*args)
    File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/data/ops/structured_function.py", line 177, in wrapper_helper
      ret = autograph.tf_convert(self._func, ag_ctx)(*nested_args)
    File "/usr/local/lib/python3.9/dist-packages/tensorflow/python/autograph/impl/api.py", line 692, in wrapper
      raise e.ag_error_metadata.to_exception(e)
  ValueError: in user code:
  
      File "/usr/local/lib/python3.9/dist-packages/sleap/nn/data/edge_maps.py", line 334, in generate_pafs  *
          edge_sources, edge_destinations = get_edge_points(instances, edge_inds)
      File "/usr/local/lib/python3.9/dist-packages/sleap/nn/data/edge_maps.py", line 233, in get_edge_points  *
          source_inds = tf.cast(tf.gather(edge_inds, 0, axis=1), tf.int32)
  
      ValueError: Shape must be at least rank 2 but is rank 1 for '{{node GatherV2}} = GatherV2[Taxis=DT_INT32, Tindices=DT_INT32, Tparams=DT_INT32, batch_dims=0](GatherV2/params, GatherV2/indices, GatherV2/axis)' with input shapes: [0], [], [] and with computed input tensors: input[2] = <1>.

roomrys avatar Mar 28 '23 21:03 roomrys