pybind11 [BUG]: Memory issues when building in release mode

[BUG]: Memory issues when building in release mode

Open alexander-soare opened this issue 3 years ago • 0 comments

Required prerequisites

[X] Make sure you've read the documentation. Your issue may be addressed there.
[X] Search the issue tracker and Discussions to verify that this hasn't already been reported. +1 or comment there if it has.
[X] Consider asking first in the Gitter chat room or in a Discussion.

Problem description

I have found that when I build my project in release mode, one of my bound arrays has garbage values in it. I have been working for hours to get a reproducible, minimal example but it's very hard to do as even adding/removing comments in the source code changes the outcome and sometimes "fixes" the problem. It almost looks as if some memory blocks are being shifted around when I make changes. Rebuilding without changes reproduces the same results.

This does not happen in debug mode.

Reproducible example code

As I mentioned above, changing my code, or even removing comments changes the outcome. I can't whittle this code down further without the error disappearing.

My directory has

CMakeLists.txt (and there is a build folder to go with this)
test.cc (the main source file)
test.py (Python code to test)
moby_ravelin.h (I feel like this is irrelevant but I couldn't succesfully remove it without losing the error)
moby_ravelin.cc (I feel like this is irrelevant but I couldn't succesfully remove it without losing the error)

To build and run in Release mode first make a fresh build directory then:

cd build; cmake -DCMAKE_BUILD_TYPE=Release ..; make -j; cd ..; python test.py

The output from the python file looks like

[1, 1, 1]
points_mean: [1, -nan, 1.1]
points[0] normalized: [0, -nan, -0.1]
points[0] in Optimize: [0, -nan, -0.1]
[1, 1, 1]
points_mean: [1, 1, 1.1]
points[0] normalized: [0, 0, -0.1]
points[0] in Optimize: [0, 0, -0.1]
[1, 1, 1]
points_mean: [1, -nan, 1.1]
points[0] normalized: [0, -nan, -0.1]
points[0] in Optimize: [0, -nan, -0.1]

Those nan's shouldn't be there.

You may also try in debug mode cd build; cmake -DCMAKE_BUILD_TYPE=Debug ..; make -j; cd ..; python test.py

The output as as it should be:

[1, 1, 1]
points_mean: [1, 1, 1]
points[0] normalized: [0, 0, 0]
points[0] in Optimize: [0, 0, 0]
[1, 1, 1]
points_mean: [1, 1, 1]
points[0] normalized: [0, 0, 0]
points[0] in Optimize: [0, 0, 0]
[1, 1, 1]
points_mean: [1, 1, 1]
points[0] normalized: [0, 0, 0]
points[0] in Optimize: [0, 0, 0]

Here is CMakeLists.txt

cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
project(test)

# Sets C++ version to 20.
set(CMAKE_CXX_STANDARD 20)

find_package(pybind11)
pybind11_add_module(test test.cc moby_ravelin.cc)

Here is test.cc

/**
 * TODO
 *   - Don't recalculate the grid each time. Store it in memory
 *   - Improve initialization by randomly sampling the grid and stopping if the result is good enough
 */

#include <iostream>
#include <chrono>
#include <fstream>
#include <vector>
#include <assert.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/numpy.h>

#include "moby_ravelin.h"

namespace py=pybind11;

// Runs the full optimization routine for fitting an obb to a point clouds
void Optimize(const std::vector<Vector3d>& points, const std::optional<Quatd>& init_quat = std::nullopt)
{
  std::cout << "points[0] in Optimize: " << points[0] << std::endl;
}

void Test(
  std::vector<Vector3d>* points, const std::optional<Quatd>& init_quat = std::nullopt)
{
  std::cout << (*points)[0] << std::endl;
  // Find the mean position of the points
  Vector3d points_mean{};
  for (const Vector3d& point: *points) {
    points_mean += point;
  }
  points_mean /= points->size();
  std::cout << "points_mean: " << points_mean << std::endl;
  for (Vector3d& point: *points) {
    point -= points_mean;
  }
  std::cout << "points[0] normalized: " << (*points)[0] << std::endl;
  Optimize(*points, init_quat);
}

void PyTest(py::array_t<double> points, std::optional<const std::array<double, 4>> init_quat = std::nullopt)
{
  // Convert points. We expect points to be a Nx3 array
  std::vector<Vector3d> vector_points{};
  for (int i = 0; i < points.shape(0); ++i) {
    vector_points.push_back(Vector3d(points.mutable_at(i, 0), points.mutable_at(i, 1), points.mutable_at(i, 2)));
  }

  if (init_quat.has_value()) {
    Test(&vector_points, Quatd(init_quat.value()));
  } else {
    Test(&vector_points, std::nullopt);
  }
}

PYBIND11_MODULE(test, m) {
  m.def(
    "test", &PyTest, py::arg("points"), py::arg("init_quat") = py::none()
  );
}

Here is test.py

import numpy as np

from build.test import test

points = np.ones(shape=(10, 3))

test(points)
test(points)
test(points)

Here is moby_ravelin.h

#pragma once

#include <cassert>
#include <cmath>
#include <algorithm>
#include <iterator>
#include <limits>


class Vector3d
{
  public:
    Vector3d() {};
    Vector3d(double x, double y, double z);
    friend std::ostream& operator<<(std::ostream& os, Vector3d v);
    Vector3d operator-() const { return Vector3d(-_data[0], -_data[1], -_data[2]); }
    Vector3d& set_zero() { _data[0] = _data[1] = _data[2] = 0.; return *this; }
    static double dot(const Vector3d& v1, const Vector3d& v2);
    static Vector3d cross(const Vector3d& v1, const Vector3d& v2);
    bool unit() const;
    double norm() const { return std::sqrt(norm_sq()); }
    double  norm_sq() const { return dot(*this, *this); }
    Vector3d abs() const { 
      Vector3d v{}; v.x() = std::fabs(this->x()); v.y() = std::fabs(this->y()); v.z() = std::fabs(this->z());
      return v; }
    double max() const { return std::max(std::max(x(), y()), z()); }
    void normalize() { assert(norm() > std::numeric_limits<double>::epsilon()); operator/=(norm()); }
    Vector3d& operator/=(double scalar) { _data[0] /= scalar; _data[1] /= scalar; _data[2] /= scalar; return *this; }
    Vector3d operator/(double scalar) const { Vector3d v{ *this }; v /= scalar; return v; }
    Vector3d& operator*=(double scalar) { _data[0] *= scalar; _data[1] *= scalar; _data[2] *= scalar; return *this; }
    Vector3d operator*(double scalar) const { Vector3d v{ *this }; v *= scalar; return v; }
    Vector3d& operator+=(const Vector3d& v) { _data[0] += v.x(), _data[1] += v.y(), _data[2] += v.z(); return *this; }
    Vector3d operator+(const Vector3d& v) const { Vector3d result{ *this }; result += v; return result; }
    Vector3d& operator-=(const Vector3d& v) { _data[0] -= v.x(), _data[1] -= v.y(), _data[2] -= v.z(); return *this; }
    Vector3d operator-(const Vector3d& v) const { Vector3d result{ *this }; result -= v; return result; }
    const double& x() const { return _data[0]; }
    double& x() { return _data[0]; }
    const double& y() const { return _data[1]; }
    double& y() { return _data[1]; }
    const double& z() const { return _data[2]; }
    double& z() { return _data[2]; }
    const double& operator[](const unsigned i) const;
    double& operator[](const unsigned i);
  private:
    double _data[3];
};

class Quatd;  // forward declare for Matrix3d operator=(const Quatd& q)

class Matrix3d
{
  public:
    Matrix3d() {};
    Matrix3d& operator=(const Quatd& q);
    static Matrix3d zero() { Matrix3d m; m.set_zero(); return m; }
    Matrix3d& set_zero() { std::fill_n(_data, 9, 0.); return *this; }
    Matrix3d& set_identity();
    Vector3d& get_column(unsigned i, Vector3d& result) const;
    Vector3d transpose_mult(const Vector3d& v) const;
    Vector3d mult(const Vector3d& v) const;
    Vector3d operator*(const Vector3d& v) const { Vector3d result{ mult(v) }; return result; }
    static Matrix3d identity() { Matrix3d m; m.set_identity(); return m; }
    // Get matrix elements. Notation is {row}{col}, so xz is the first row and the third column
    const double& xx() const { return _data[0]; }
    double& xx() { return _data[0]; }
    const double& xy() const { return _data[3]; }
    double& xy() { return _data[3]; }
    const double& xz() const { return _data[6]; }
    double& xz() { return _data[6]; }
    const double& yx() const { return _data[1]; }
    double& yx() { return _data[1]; }
    const double& yy() const { return _data[4]; }
    double& yy() { return _data[4]; }
    const double& yz() const { return _data[7]; }
    double& yz() { return _data[7]; }
    const double& zx() const { return _data[2]; }
    double& zx() { return _data[2]; }
    const double& zy() const { return _data[5]; }
    double& zy() { return _data[5]; }
    const double& zz() const { return _data[8]; }
    double& zz() { return _data[8]; }
  private:
    // Data layout is column-major
    double _data[9];
};

std::ostream& operator<<(std::ostream& os, Matrix3d m);

class Quatd
{
  public:
    Quatd();
    Quatd(const Vector3d& axis, double angle);
    Quatd(double x, double y, double z, double w);
    Quatd(std::array<double, 4> xyzw);
    Quatd& operator=(const Matrix3d& m);
    double magnitude() const;
    void conjugate();
    static Quatd conjugate(const Quatd& q);
    Quatd& invert();
    bool unit() const;
    void normalize();
    static Quatd normalize(const Quatd& q);
    static Quatd quaternionProduct(const Quatd& q1, const Quatd& q2);
    Vector3d operator*(const Vector3d& v) const;
    double x;  // i coefficient 
    double y;  // j coefficient
    double z;  // k coefficient
    double w;  // real part
  private:
    static double safe_sqrt(double x) { return (x <= 0.) ? 0. : std::sqrt(x); }
};

std::ostream& operator<<(std::ostream& os, Quatd& q);

class Transform3d
{
  public:
    Transform3d() = delete;  // Alex note: Still not sure why I would do this. Just following other implementations
    Transform3d(const Matrix3d& m, const Vector3d& v);
    Transform3d& invert();
    Transform3d inverse() const { return invert(*this); }
    static Transform3d invert(const Transform3d& t);
    Vector3d transform_point(const Vector3d& p) const;
    // The relative orientation
    Quatd q;
    // The relative origin
    Vector3d x;
};

class OBB {
  public:
    OBB();

    template <class ForwardIterator>
    void expand_to_fit(ForwardIterator begin, ForwardIterator end) 
    {
      const unsigned THREE_D = 3; 

      // Get the corners of the OBB as if it were an AABB
      Vector3d lo = -this->l, hi = this->l;

      // Process all points
      for (ForwardIterator it=begin; it != end; ++it)
      {
        // Get the point in the frame of the OBB
        Vector3d pt = R.transpose_mult(Vector3d(*it - this->center));
        for (unsigned i{ 0 }; i< THREE_D; ++i)
          if (pt[i] < lo[i])
            lo[i] = pt[i];
          else if (pt[i] > hi[i])
            hi[i] = pt[i];
      }

      // Half lo and hi
      lo *= 0.5;
      hi *= 0.5;

      // Transform the center of the box
      this->center += R * Vector3d(lo+hi);

      // set the new lengths of the box
      this->l = hi - lo;
    }

    Vector3d center;
    Vector3d l; // Half-lengths in x, y, z
    Matrix3d R; // Orientation of this OBB (euler angles? TODO)
    std::array<std::array<double, 3>, 8> get_vertices();
};

Here is moby_ravelin.cc

/**
 * Just pulling out the necessary bits from Moby and Ravelin
 */

#include <assert.h>
#include <cmath>
#include <iostream>
#include <vector>
#include <array>

#include "moby_ravelin.h"

/// Exception thrown when trying to access the index beyond the range of the data 
class InvalidIndexException : public std::runtime_error
{
  public:
    InvalidIndexException() : std::runtime_error("Invalid index") {}
}; // end class

// ========== Vector ==========

std::ostream& operator<<(std::ostream& os, Vector3d v) {
  os << "[" << v._data[0] << ", " << v._data[1] << ", " << v._data[2] << "]";
  return os;
}

Vector3d::Vector3d(double x, double y, double z): _data{ x, y, z } {};

// Computes the dot product between two vectors
double Vector3d::dot(const Vector3d& v1, const Vector3d& v2)
{
  return v1.x() * v2.x() + v1.y() * v2.y() + v1.z() * v2.z();
}

// Determines whether this is a unit vector
bool Vector3d::unit() const
{
  double norm = this->norm();
  return (std::fabs(norm - 1.) < std::sqrt(std::numeric_limits<double>::epsilon()));
}

// Computes the cross product between two vectors
Vector3d Vector3d::cross(const Vector3d& v1, const Vector3d& v2)
{
  Vector3d w;
  w[0] = v1[1] * v2[2] - v1[2] * v2[1];
  w[1] = v1[2] * v2[0] - v1[0] * v2[2];
  w[2] = v1[0] * v2[1] - v1[1] * v2[0];
  return w;
}

const double& Vector3d::operator[] (const unsigned i) const
{
  if (i >= 3) {
    throw InvalidIndexException();
  }
  return _data[i];
}

double& Vector3d::operator[](const unsigned i)
{
  if (i >= 3) {
    throw InvalidIndexException();
  }
  return _data[i];
}

// ========== Matrix ==========

std::ostream& operator<<(std::ostream& os, Matrix3d m) {
  os << "[\n  [" << m.xx() << ", " << m.xy() << ", " << m.xz() << "],\n  [" << m.yx() << ", " << m.yy() << ", "
    << m.yz() << "],\n  [" << m.zx() << ", " << m.zy() << ", " << m.zz() << "]\n]";
  return os;
}

Matrix3d& Matrix3d::set_identity()
{
  const unsigned XX = 0, YY = 4, ZZ = 8;
  xy() = xz() = yz() = yx() = zx() = zy() = 0.;
  _data[XX] = _data[YY] = _data[ZZ] = 1.;
  return *this;
}

Vector3d& Matrix3d::get_column(unsigned i, Vector3d& result) const
{
  const unsigned SZ = 3;
  assert(i < SZ);

  unsigned st = i*SZ;
  result.x() = _data[st++];
  result.y() = _data[st++];
  result.z() = _data[st];
  return result;
}

// Multiplies the transpose of this matrix by a vector and returns the result in a new vector
Vector3d Matrix3d::transpose_mult(const Vector3d& v) const
{
  Vector3d result;
  result.x() = _data[0]*v.x() + _data[1]*v.y() + _data[2]*v.z();
  result.y() = _data[3]*v.x() + _data[4]*v.y() + _data[5]*v.z();  
  result.z() = _data[6]*v.x() + _data[7]*v.y() + _data[8]*v.z();  
  return result;
}

// Multiplies this matrix by a vector and returns the result in a new vector
Vector3d Matrix3d::mult(const Vector3d& o) const
{
  Vector3d result;
  result.x() = _data[0]*o.x() + _data[3]*o.y() + _data[6]*o.z();
  result.y() = _data[1]*o.x() + _data[4]*o.y() + _data[7]*o.z();  
  result.z() = _data[2]*o.x() + _data[5]*o.y() + _data[8]*o.z();  
  return result;
}

// Assigns quaternion by expressing it as a 3d rotation matrix
Matrix3d& Matrix3d::operator=(const Quatd& q)
{
  // Verify that the quaternion is normalized
  assert(q.unit());
  // Set up repeated products
  const double xx = q.x*q.x;
  const double xy = q.x*q.y;
  const double xz = q.x*q.z;
  const double xw = q.x*q.w;
  const double yy = q.y*q.y;
  const double yz = q.y*q.z;
  const double yw = q.y*q.w;
  const double zz = q.z*q.z;
  const double zw = q.z*q.w; 
  const double ww = q.w*q.w;

  Matrix3d& m = *this;
  m.xx() = 2*(xx + ww) - 1;
  m.xy() = 2*(xy - zw);
  m.xz() = 2*(xz + yw);
  m.yx() = 2*(xy + zw);
  m.yy() = 2*(yy + ww) - 1;
  m.yz() = 2*(yz - xw);
  m.zx() = 2*(xz - yw);
  m.zy() = 2*(yz + xw);
  m.zz() = 2*(zz + ww) - 1;
  return m;
}


// ========== Quat ==========

std::ostream& operator<<(std::ostream& os, Quatd& q) {
  os << "x: " << q.x << ", y: " << q.y << ", z: " << q.z << ", w: " << q.w;
  return os;
}

// Quaternion constructed as [0, 0, 0] 1
Quatd::Quatd(): x{ 0. }, y{ 0. }, z{ 0. }, w { 1.0 } {};

// Construct quaternion by specifying x, y, z, w
Quatd::Quatd(double x, double y, double z, double w): x{ x }, y{ y }, z{ z }, w{ w } {};

// Construct quaternion specifing an array with {x, y, z, w}
Quatd::Quatd(std::array<double, 4> xyzw): x{ xyzw[0] }, y{ xyzw[1] }, z{ xyzw[2] }, w{ xyzw[3] } {};

// Constructs a quaternion representing a roation of angle about axis
Quatd::Quatd(const Vector3d& axis, double angle)
{
  double sinHalfTheta{ std::sin(angle/2) };
  // note: half angle because of rotation is qpq*
  x = sinHalfTheta*axis[0];
  y = sinHalfTheta*axis[1];
  z = sinHalfTheta*axis[2];
  w = std::cos(angle/2);
}

double Quatd::magnitude() const
{
  return safe_sqrt(x*x + y*y + z*z + w*w);
}

// Determines whether this is a unit quaternion
bool Quatd::unit() const
{
  double mag = magnitude();
  return (std::fabs(mag - 1.) < std::sqrt(std::numeric_limits<double>::epsilon()));
}

// Negates the value of each of i, j, k coefficients in place
void Quatd::conjugate()
{
  *this = conjugate(*this);
}

// Negates the value of each of i, j, k coefficients and returns the resulting quaternion
Quatd Quatd::conjugate(const Quatd& q)
{
  return Quatd(-q.x, -q.y, -q.z, q.w);
}

// Inverts the quaternion in place and return a reference to it
Quatd& Quatd::invert()
{
  conjugate();
  return *this;
}

// Normalizes the quaternion in place
void Quatd::normalize()
{
  double mag = magnitude();
  if (mag != 0.) {
    double maginv = 1. / mag;
    x *= maginv;
    y *= maginv;
    z *= maginv;
    w *= maginv;
  } else {
    x = y = z = 0;
    w = 1;
  }
}

// Computes the normalized quaternion of q and returns it
Quatd Quatd::normalize(const Quatd& q)
{
  Quatd qn(q);
  qn.normalize();
  return qn;
}

// Computes quaternion product
Quatd Quatd::quaternionProduct(const Quatd& q1, const Quatd& q2)
{
  Quatd qp;
  qp.w = q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z;
  qp.x = q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y;
  qp.y = q1.w * q2.y + q1.y * q2.w + q1.z * q2.x - q1.x * q2.z;
  qp.z = q1.w * q2.z + q1.z * q2.w + q1.x * q2.y - q1.y * q2.x;
  return qp;
}

// Converts a rotation matrix to a unit Quaternion
Quatd& Quatd::operator=(const Matrix3d& m)
{
  // core computation
  Quatd& q = *this;
  q.w = std::sqrt(std::max((double) 0.0, (double) 1.0 + m.xx() + m.yy() + m.zz())) * (double) 0.5;
  q.x = std::sqrt(std::max((double) 0.0, (double) 1.0 + m.xx() - m.yy() - m.zz())) * (double) 0.5;
  q.y = std::sqrt(std::max((double) 0.0, (double) 1.0 - m.xx() + m.yy() - m.zz())) * (double) 0.5;
  q.z = std::sqrt(std::max((double) 0.0, (double) 1.0 - m.xx() - m.yy() + m.zz())) * (double) 0.5;

  // sign computation
  if (m.zy() - m.yz() < (double) 0.0)
    q.x = -q.x;
  if (m.xz() - m.zx() < (double) 0.0)
    q.y = -q.y;
  if (m.yx() - m.xy() < (double) 0.0)
    q.z = -q.z;

  #ifndef NDEBUG
  if (!q.unit())
    std::cerr << "QUAT::set() warning!  not a unit quaternion!" << std::endl;
  #endif

  return q;
}

//Multiplies the 3x3 matrix corresponding to this quaternion by a 3D vector and returns the result in a new 3d vector
Vector3d Quatd::operator*(const Vector3d& v) const
{
  const double w2 = w*w;
  const double x2 = x*x;
  const double y2 = y*y;
  const double z2 = z*z;
  const double xy = x*y;
  const double xz = x*z;
  const double yz = y*z;
  const double xw = x*w;
  const double yw = y*w;
  const double zw = z*w;
  return Vector3d((-1.0+2.0*(w2+x2))*v.x() + 2.0*((xy-zw)*v.y() + (yw+xz)*v.z()), 2.0*((xy+zw)*v.x() + (-xw+yz)*v.z())
                   + (-1.0+2.0*(w2+y2))*v.y(), 2.0*((-yw+xz)*v.x() + (xw+yz)*v.y()) + (-1.0+2.0*(w2+z2))*v.z());
}

// ========== Transform ==========

// Constructs transformation from rotation matrix and translation vector
Transform3d::Transform3d(const Matrix3d& m, const Vector3d& v):
  x{ v } 
{
  q = m;  // Not in the member initializer list because we are calling operator= instead of a constructor
}

Transform3d& Transform3d::invert()
{
  // Invert the quaternion
  q.invert();

  // Determine the new translation
  x = q * (-x);

  // todo: do I need this?
  // swap the source and target poses
  // std::swap(source, target);

  return *this;
}

// Invert the provided transformation and return a new resulting transformation
Transform3d Transform3d::invert(const Transform3d& t)
{
  return Transform3d(t).invert();
}

// Apply the transformation to a point
Vector3d Transform3d::transform_point(const Vector3d& p) const
{
  return Vector3d(q * Vector3d(p) + x);
}



// ========== OBB ==========

OBB::OBB()
{
  R = Matrix3d::identity();
  center.set_zero();
  l.set_zero();
}

std::array<std::array<double, 3>, 8> OBB::get_vertices()
{
  const unsigned X = 0, Y = 1, Z = 2;

  // Get the three axes of the OBB, scaled by axis lengths
  Vector3d axis1{};
  axis1 = this->R.get_column(X, axis1); 
  Vector3d axis2{};
  axis2 = this->R.get_column(Y, axis2);
  Vector3d axis3{};
  axis3 = this->R.get_column(Z, axis3); 
  axis1 *= this->l[X];
  axis2 *= this->l[Y];
  axis3 *= this->l[Z];

  std::array<std::array<double, 3>, 8> vertices;

  std::vector<int> coeffs{-1, 1};
  int ix{0};
  for (int i: coeffs) {
    for (int j: coeffs) {
      for (int k: coeffs) {
        Vector3d corner = this->center + axis1 * i + axis2 * j + axis3 * k;
        std::array<double, 3> vertex{corner.x(), corner.y(), corner.z()};
        vertices[ix] = vertex;
        ix++;
      }
    }
  }
  return vertices;
}

Here is how I got pybind:

git clone https://github.com/pybind/pybind11.git
cd pybind11
git checkout ffa346860b306c9bbfb341aed9c14c067751feb8
mkdir build; cd build
cmake -DCMAKE_INSTALL_PREFIX=/usr/local ..
make -j
sudo make install

Python version is 3.8.10

Output of 'uname -a' is Linux alexander-dlrig 5.13.0-40-generic #45~20.04.1-Ubuntu SMP Mon Apr 4 09:38:31 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux

May 10 '22 10:05 alexander-soare

pybind11 pybind11 copied to clipboard

[BUG]: Memory issues when building in release mode

Required prerequisites

Problem description

Reproducible example code

pybind11
pybind11 copied to clipboard