BehaviorTree.ROS2 icon indicating copy to clipboard operation
BehaviorTree.ROS2 copied to clipboard

Error when canceling action during halt()

Open MatteoAlbi opened this issue 2 years ago • 10 comments

I'm running in a weird behavior when trying to halting an action node.

Setup: BehaviorTree.ROS2 branch humble bb0d510 BehaviorTree.CPP branch master acbc707

Structure: ws |- BehaviorTree.ROS2 |- BehaviorTree.CPP |- test |- CMakeLists.txt |- package.xml |- src |- bt_act_test.cpp

CMakeLists.txt:

cmake_minimum_required(VERSION 3.8)
project(bt_act_test)

if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
    add_compile_options(-Wall -Wextra -Wpedantic)
endif()

# find dependencies
find_package(ament_cmake REQUIRED)
find_package(rclcpp REQUIRED )
find_package(rclcpp_action REQUIRED )
find_package(behaviortree_cpp REQUIRED )
find_package(behaviortree_ros2 REQUIRED )
find_package(example_interfaces REQUIRED)

add_executable(bt_act_test src/bt_act_test.cpp)
target_include_directories(bt_act_test PUBLIC
    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
    $<INSTALL_INTERFACE:include>
)
ament_target_dependencies(bt_act_test
    rclcpp
    rclcpp_action
    behaviortree_cpp
    behaviortree_ros2
    example_interfaces
)
    

if(BUILD_TESTING)
    find_package(ament_lint_auto REQUIRED)
    # the following line skips the linter which checks for copyrights
    # comment the line when a copyright and license is added to all source files
    set(ament_cmake_copyright_FOUND TRUE)
    # the following line skips cpplint (only works in a git repo)
    # comment the line when this package is in a git repo and when
    # a copyright and license is added to all source files
    set(ament_cmake_cpplint_FOUND TRUE)
    ament_lint_auto_find_test_dependencies()
endif()

install(TARGETS 
    bt_act_test
DESTINATION lib/${PROJECT_NAME})    

ament_package()

package.xml:

...
    <buildtool_depend>ament_cmake</buildtool_depend>

    <depend>rclcpp</depend>
    <depend>rclcpp_action</depend>
    <depend>behaviortree_ros2</depend>
    <depend>behaviortree_cpp</depend>
    <depend>example_interfaces</depend>

    <test_depend>ament_lint_auto</test_depend>
    <test_depend>ament_lint_common</test_depend>

    <export>
      <build_type>ament_cmake</build_type>
    </export>
...

bt_act_test.cpp:

#include <stdio.h>
#include <string>
#include <chrono>
#include <thread>
#include <memory>
#include <filesystem>
#include <vector>

#include <example_interfaces/action/fibonacci.hpp>
#include <rclcpp/rclcpp.hpp>
#include <rclcpp_action/rclcpp_action.hpp>

#include <behaviortree_ros2/bt_action_node.hpp>

using vec_int = std::vector<int32_t>;
using Fibonacci = example_interfaces::action::Fibonacci;
using GoalHandleFibonacci = rclcpp_action::ServerGoalHandle<Fibonacci>;



void sleep(int ms=10){
    std::this_thread::sleep_for(std::chrono::milliseconds(ms));
}


class FibonacciActionServer : public rclcpp::Node {  

public:

    explicit FibonacciActionServer(const rclcpp::NodeOptions & options = rclcpp::NodeOptions())
    : Node("server_node", options)
    {
        using namespace std::placeholders;

        this->action_server_ = rclcpp_action::create_server<Fibonacci>(
            this,
            "fibonacci",
            std::bind(&FibonacciActionServer::handle_goal, this, _1, _2),
            std::bind(&FibonacciActionServer::handle_cancel, this, _1),
            std::bind(&FibonacciActionServer::handle_accepted, this, _1)    
        );
    }

private:
    rclcpp_action::Server<Fibonacci>::SharedPtr action_server_;

    rclcpp_action::GoalResponse handle_goal(
        const rclcpp_action::GoalUUID & uuid,
        std::shared_ptr<const Fibonacci::Goal> goal)
    {
        RCLCPP_INFO(this->get_logger(), "Received goal request with order %d", goal->order);
        (void)uuid;
        return rclcpp_action::GoalResponse::ACCEPT_AND_EXECUTE;
    }

    rclcpp_action::CancelResponse handle_cancel(
        const std::shared_ptr<GoalHandleFibonacci> goal_handle)
    {
        RCLCPP_INFO(this->get_logger(), "Received request to cancel goal");
        (void)goal_handle;
        return rclcpp_action::CancelResponse::ACCEPT;
    }

    void handle_accepted(const std::shared_ptr<GoalHandleFibonacci> goal_handle)
    {
        using namespace std::placeholders;
        // this needs to return quickly to avoid blocking the executor, so spin up a new thread
        std::thread{std::bind(&FibonacciActionServer::execute, this, _1), goal_handle}.detach();
    }

    void execute(const std::shared_ptr<GoalHandleFibonacci> goal_handle)
    {
        RCLCPP_INFO(this->get_logger(), "Executing goal");
        rclcpp::Rate loop_rate(1);
        const auto goal = goal_handle->get_goal();
        auto feedback = std::make_shared<Fibonacci::Feedback>();
        auto & sequence = feedback->sequence;
        sequence.push_back(0);
        sequence.push_back(1);
        auto result = std::make_shared<Fibonacci::Result>();

        for (int i = 1; (i < goal->order) && rclcpp::ok(); ++i) {
            // Check if there is a cancel request
            if (goal_handle->is_canceling()) {
                result->sequence = sequence;
                goal_handle->canceled(result);
                RCLCPP_INFO(this->get_logger(), "Goal canceled");
                return;
            }
            // Update sequence
            sequence.push_back(sequence[i] + sequence[i - 1]);
            // Publish feedback
            goal_handle->publish_feedback(feedback);
            RCLCPP_INFO(this->get_logger(), "Publish feedback");

            loop_rate.sleep();
        }

        // Check if goal is done
        if (rclcpp::ok()) {
            result->sequence = sequence;
            goal_handle->succeed(result);
            RCLCPP_INFO(this->get_logger(), "Goal succeeded");
        }
    }
};

using namespace BT;

class FibonacciAction: public RosActionNode<Fibonacci>
{
public:
  FibonacciAction(const std::string& name,
                  const NodeConfig& conf,
                  const RosNodeParams& params)
    : RosActionNode<Fibonacci>(name, conf, params)
  {}

  // The specific ports of this Derived class
  // should be merged with the ports of the base class,
  // using RosActionNode::providedBasicPorts()
  static PortsList providedPorts()
  {
    return providedBasicPorts({InputPort<unsigned>("order"), OutputPort<vec_int>("result")});
  }

  // This is called when the TreeNode is ticked and it should
  // send the request to the action server
  bool setGoal(RosActionNode::Goal& goal) override 
  {
    // get "order" from the Input port
    getInput("order", goal.order);
    // return true, if we were able to set the goal correctly.
    return true;
  }
  
  // Callback executed when the reply is received.
  // Based on the reply you may decide to return SUCCESS or FAILURE.
  NodeStatus onResultReceived(const WrappedResult& wr) override
  {
    std::stringstream ss;
    ss << "Result received: ";
    for (auto number : wr.result->sequence) {
      ss << number << " ";
    }
    RCLCPP_INFO(node_->get_logger(), ss.str().c_str());
    this->setOutput<vec_int>("result", wr.result->sequence);
    return NodeStatus::SUCCESS;
  }

  // Callback invoked when there was an error at the level
  // of the communication between client and server.
  // This will set the status of the TreeNode to either SUCCESS or FAILURE,
  // based on the return value.
  // If not overridden, it will return FAILURE by default.
  virtual NodeStatus onFailure(ActionNodeErrorCode error) override
  {
    RCLCPP_ERROR(node_->get_logger(), "Error: %d", error);
    return NodeStatus::FAILURE;
  }

  // we also support a callback for the feedback, as in
  // the original tutorial.
  // Usually, this callback should return RUNNING, but you
  // might decide, based on the value of the feedback, to abort
  // the action, and consider the TreeNode completed.
  // In that case, return SUCCESS or FAILURE.
  // The Cancel request will be send automatically to the server.
  NodeStatus onFeedback(const std::shared_ptr<const Feedback> feedback)
  {
    std::stringstream ss;
    ss << "Next number in sequence received: ";
    for (auto number : feedback->sequence) {
      ss << number << " ";
    }
    RCLCPP_INFO(node_->get_logger(), ss.str().c_str());
    this->setOutput<vec_int>("result", feedback->sequence);
    return NodeStatus::RUNNING;
  }
};

template<typename T>
class GetBBvalueNode : public BT::SyncActionNode{
public:
    GetBBvalueNode (
        const std::string& name, const BT::NodeConfig& config, 
        std::function<BT::NodeStatus(T)> callback,
        std::function<BT::NodeStatus(BT::Expected<T>)> no_value_callback = [](BT::Expected<T> input){
            throw BT::RuntimeError("missing required input [input]: ", input.error() );
            return BT::NodeStatus::FAILURE; //needed to pass contructor checks
        }
    ) : 
    BT::SyncActionNode(name, config),
    _callback(callback),
    _no_value_callback(no_value_callback) {}

    BT::NodeStatus tick() override{
        auto input = getInput<T>("input"); //get value from port "input"

        // Check if expected is valid. If not, throw its error
        if (!input) return this->_no_value_callback(input);
        else return this->_callback(input.value());
    }

    /**
     * @brief set port input as <T> input
    */
    static BT::PortsList providedPorts(){
        return{ BT::InputPort<T>("input", "read value")};
    }

private:
    std::function<BT::NodeStatus(T)> _callback;
    std::function<BT::NodeStatus(BT::Expected<T>)> _no_value_callback;
};

int main(int argc, char *argv[]){
    rclcpp::init(argc, argv);

    static const char* xml_text = R"(
    <root BTCPP_format="4" >
        <BehaviorTree ID="MainTree">
            <ReactiveSequence>
                <GetVec input="{res}"/>
                <Fibonacci  order="5"
                            result="{res}"/>
            </ReactiveSequence>
        </BehaviorTree>
    </root>
    )";
    
    std::vector<vec_int> res_vecs;

    auto get_vec = [&res_vecs](vec_int input){
        if ( std::find(res_vecs.begin(), res_vecs.end(), input) == res_vecs.end() ){ 
            res_vecs.push_back(input);
            for(auto el : res_vecs.back()) std::cout << el << "; ";
            std::cout << std::endl;
        } 
        if (res_vecs.size() == 2) return BT::NodeStatus::FAILURE;
        else return BT::NodeStatus::SUCCESS;
    };
    auto on_err = [](BT::Expected<vec_int> input){
        (void)input; 
        return BT::NodeStatus::SUCCESS;
    };


    BehaviorTreeFactory factory;
    // provide the ROS node and the name of the action service
    RosNodeParams params; 
    auto node = std::make_shared<rclcpp::Node>("fibonacci_action_client");
    params.nh = node;
    params.default_port_value = "fibonacci";
    factory.registerNodeType<FibonacciAction>("Fibonacci", params);
    factory.registerNodeType<GetBBvalueNode<vec_int>>("GetVec", get_vec, on_err);

    
    auto server_node = std::make_shared<FibonacciActionServer>();
    auto spin_callback = [](rclcpp::Node::SharedPtr node){rclcpp::spin(node);};
    std::thread{spin_callback, server_node}.detach();
    sleep();

    // tick the tree
    auto tree = factory.createTreeFromText(xml_text);
    tree.tickWhileRunning();

    rclcpp::shutdown();

    return 0;
}

The code is essentially copied-pasted from the tutorials BT ros2 integration and ROS2 action server.

When I try to run the executable I get the following:

root@f1e379732b0f:~/projects# ros2 run bt_act_test bt_act_test 
[INFO] [1687792324.400457200] [server_node]: Received goal request with order 5
[INFO] [1687792324.401037500] [server_node]: Executing goal
[INFO] [1687792324.401250200] [server_node]: Publish feedback
[INFO] [1687792324.421203700] [fibonacci_action_client]: Goal accepted by server, waiting for result
[INFO] [1687792325.401689100] [server_node]: Publish feedback
[INFO] [1687792325.407532400] [fibonacci_action_client]: Next number in sequence received: 0 1 1 2 
0; 1; 1; 2; 
[INFO] [1687792326.401572200] [server_node]: Publish feedback
[INFO] [1687792326.405681700] [fibonacci_action_client]: Next number in sequence received: 0 1 1 2 3 
0; 1; 1; 2; 3; 
[INFO] [1687792326.406163200] [server_node]: Received request to cancel goal
terminate called after throwing an instance of 'std::runtime_error'
  what():  Asked to publish result for goal that does not exist
[ros2run]: Aborted

Apologies for the long message, but I hope it will make the error reproduction easier. Thanks in advance for the support

MatteoAlbi avatar Jun 26 '23 15:06 MatteoAlbi

I am having more or less the same issue. Our structure is suprisingly similar. However i am getting a different error message

[INFO] [1687943587.313014132] [action_test_client_1]: Goal handle: goal is 2
terminate called after throwing an instance of 'rclcpp_action::exceptions::UnknownGoalHandleError'
  what():  Goal handle is not known to this client

i do not get the Received request to cancel goal even though i have the same setup.

I noticed the error does not happen with a regular Sequence instead of a Reactive Sequence, however is not my intended behaviour.

this is my tree:

<?xml version="1.0" encoding="UTF-8"?>
<root BTCPP_format="4">
  <BehaviorTree ID="MainLogic">
    <ReactiveSequence>
      <Fallback>
        <GET_ROV_STATUS/>
        <ACTION_TEST_CLIENT_2 goal="1"/>
      </Fallback>
      <ACTION_TEST_CLIENT_1 goal="2"/>
    </ReactiveSequence>
  </BehaviorTree>

  <!-- Description of Node Models (used by Groot) -->
  <TreeNodesModel>
    <Action ID="ACTION_TEST_CLIENT_1"
            editable="true">
      <input_port name="goal"/>
    </Action>
    <Action ID="ACTION_TEST_CLIENT_2"
            editable="true">
      <input_port name="goal"/>
    </Action>
    <Action ID="GET_ROV_STATUS"
            editable="true"/>
  </TreeNodesModel>

</root>

adding my gdb backtrace in hopes of this helping:

#0  __pthread_kill_implementation (no_tid=0, signo=6, threadid=140737339827200)
    at ./nptl/pthread_kill.c:44
#1  __pthread_kill_internal (signo=6, threadid=140737339827200) at ./nptl/pthread_kill.c:78
#2  __GI___pthread_kill (threadid=140737339827200, signo=signo@entry=6)
    at ./nptl/pthread_kill.c:89
#3  0x00007ffff74f3476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
#4  0x00007ffff74d97f3 in __GI_abort () at ./stdlib/abort.c:79
#5  0x00007ffff779dbbe in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6
#6  0x00007ffff77a924c in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6
#7  0x00007ffff77a92b7 in std::terminate() () from /lib/x86_64-linux-gnu/libstdc++.so.6
#8  0x00007ffff77a9518 in __cxa_throw () from /lib/x86_64-linux-gnu/libstdc++.so.6
#9  0x0000555555598600 in rclcpp_action::Client<example_interfaces::action::Fibonacci>::async_cancel_goal(std::shared_ptr<rclcpp_action::ClientGoalHandle<example_interfaces::action::Fibonacci> >, std::function<void (std::shared_ptr<action_msgs::srv::CancelGoal_Response_<std::allocator<void> > >)>) (this=0x555555ba6460, 
    goal_handle=std::shared_ptr<rclcpp_action::ClientGoalHandle<example_interfaces::action::Fibonacci>> (empty) = {...}, cancel_callback=...)
    at /opt/ros/humble/include/rclcpp_action/rclcpp_action/client.hpp:536
#10 0x00005555555951a0 in BT::RosActionNode<example_interfaces::action::Fibonacci>::cancelGoal (this=0x555555b85c20)
    at /---/---/ros2ws/install/behaviortree_ros2/include/behaviortree_ros2/behaviortree_ros2/bt_action_node.hpp:408
#11 0x0000555555591a44 in BT::RosActionNode<example_interfaces::action::Fibonacci>::halt (
    this=0x555555b85c20)
--Type <RET> for more, q to quit, c to continue without paging--
    at /---/---/ros2ws/install/behaviortree_ros2/include/behaviortree_ros2/behaviortree_ros2/bt_action_node.hpp:401
#12 0x00007ffff7c9f2c3 in BT::TreeNode::haltNode() ()
   from /usr/local/lib/libbehaviortree_cpp.so
#13 0x00007ffff7c9d7a8 in BT::ControlNode::haltChild(unsigned long) ()
   from /usr/local/lib/libbehaviortree_cpp.so
#14 0x00007ffff7ce0ecb in BT::ReactiveSequence::tick() ()
   from /usr/local/lib/libbehaviortree_cpp.so
#15 0x00007ffff7ca099d in BT::TreeNode::executeTick() ()
   from /usr/local/lib/libbehaviortree_cpp.so
#16 0x00007ffff7c6cc0e in BT::Tree::tickRoot(BT::Tree::TickOption, std::chrono::duration<long, std::ratio<1l, 1000l> >) () from /usr/local/lib/libbehaviortree_cpp.so
#17 0x0000555555566f21 in main (argc=1, argv=0x7fffffffd888)
    at /---/---/ros2ws/src/shared_playground/src/behavior_tree.cpp:40

i hope we can find out what's causing this. To me it seems like the action stems from the action server goal cancel itself, but i do not understand why it works with a regular Sequence then.

Stewil avatar Jun 28 '23 09:06 Stewil

I refactored the code. I will try your example soon

facontidavide avatar Aug 24 '23 11:08 facontidavide

Hi, I somehow landed into the same runtime error

I am not really sure why one of our Custom BT Nodes based on the bt_action_node is falling under this behavior. As far as I have seen, there is nothing out of the ordinary with it.

Anyways, I opted to handle this behavior by catching the exception that is thrown by the action_client_->async_get_result and action_client_->async_cancel_goal as done in https://github.com/ErickKramer/BehaviorTree.ROS2/blob/c125b15e5dee9b697bcdf7c1c68213f159b9a9f0/behaviortree_ros2/include/behaviortree_ros2/bt_action_node.hpp#L439

I'd be glad to open PR if you find such a solution good enough.

ErickKramer avatar Jan 23 '24 14:01 ErickKramer

Hi, I somehow landed into the same runtime error

I am not really sure why one of our Custom BT Nodes based on the bt_action_node is falling under this behavior. As far as I have seen, there is nothing out of the ordinary with it.

Anyways, I opted to handle this behavior by catching the exception that is thrown by the action_client_->async_get_result and action_client_->async_cancel_goal as done in https://github.com/ErickKramer/BehaviorTree.ROS2/blob/c125b15e5dee9b697bcdf7c1c68213f159b9a9f0/behaviortree_ros2/include/behaviortree_ros2/bt_action_node.hpp#L439

I'd be glad to open PR if you find such a solution good enough.

Further insight. This seems to happen when setGoal of the Action Node hasn't finished by the time the halt is triggered.

ErickKramer avatar Jan 23 '24 14:01 ErickKramer

I'm getting the exact same error as @ErickKramer.

marj3220 avatar Feb 16 '24 19:02 marj3220

ok, I will investigate

facontidavide avatar Feb 16 '24 22:02 facontidavide

I ran into the same issue as well. It appears to be a race condition between server-side goal acceptance / rejection and client side cancellation.

I "solved" it by adding the following to the top of RosActionNode<T>::cancelGoal():

template<class T> inline
  void RosActionNode<T>::cancelGoal()
{
  // block until we get the goal handle (need it to cancel)
  if( !goal_received_ )
  {
    // process callbacks until future returns (could be indefinite...)
    callback_group_executor_.spin_until_future_complete(future_goal_handle_);

    // update variables
    goal_received_ = true;
    goal_handle_ = future_goal_handle_.get();
    future_goal_handle_ = {};

    // if no goal handle the goal was rejected; easy exit
    if( !goal_handle_ )
    {
      return;
    }
  }
  // ... rest of function unchanged

Unfortunately this won't work in the general case since it introduces a potentially indefinite blocking call into cancelGoal... not sure what the right solution looks like.

danielmohansahu avatar Feb 20 '24 22:02 danielmohansahu

I faced similar issues in case of inherited RosActionNode. [in commit : bdb778ac98583c96587fdc0bc22f8c24e1bb1a54]

In my case, most issues occurs due to not catching up GoalHandler in RosActionNode. It appears that there is a need for a term to be invoked or scheduled in an executor.

In RosActionNode, implemented like this :

      auto nodelay = std::chrono::milliseconds(0);
      auto timeout = rclcpp::Duration::from_seconds( double(server_timeout_.count()) / 1000);
      auto ret = callback_group_executor_.spin_until_future_complete(future_goal_handle_, nodelay);

That executor tries to find out goal_handler asynchronously, but spin_until_future_complete seems to check out only one execution at once, while other executions are also present in it. So it appears to need a term to be invoked.

In my case, replacing it with callback_group_executor_.spin_until_future_complete(future_goal_handle_, std::chrono::milliseconds(30)) works well.

leeminju531 avatar Apr 22 '24 15:04 leeminju531