cugraph icon indicating copy to clipboard operation
cugraph copied to clipboard

CuGraph hungarian matching slower than CPU based SciPy implemenation

Open JamesMcCullochDickens opened this issue 1 year ago • 2 comments

I have to compute a lot of hungarian matchings between sets of points using their distance as the matching criterion. So far I have tried this hybrid Scipy (CPU) and PyTorch method:

import numpy as np
from scipy.optimize import linear_sum_assignment
from scipy.spatial.distance import cdist
import torch

def torch_cdist(p1: np.ndarray, p2: np.ndarray) -> np.ndarray:
    p1 = torch.tensor(p1).to(0)
    p2 = torch.tensor(p2).to(0)
    cdist = torch.cdist(p1, p2, p=2)
    return cdist.cpu().numpy()


def hungarian_matching_pcds(points1: np.ndarray, points2: np.ndarray, to_gpu: bool = False) -> List[Tuple]:
    if to_gpu:
        cost_matrix = torch_cdist(points1, points2)
    else:
        cost_matrix = cdist(points1, points2)
    row_ind, col_ind = linear_sum_assignment(cost_matrix)
    matches = list(zip(row_ind, col_ind))
    return matches

As well as using cugraph and pylibraft

import numpy as np
import cupy as cp
import cugraph
from pylibraft.distance import pairwise_distance
import cudf
def create_bipartite_graph(weights: cp.ndarray) -> cugraph.Graph:
    N = weights.shape[0]
    x = cp.arange(N)
    y = cp.arange(N)
    destinations, sources = cp.meshgrid(x, y, indexing='ij')
    destinations = cp.ravel(destinations) + N
    sources = cp.ravel(sources)
    weights_r = cp.array(cp.ravel(weights), dtype=cp.float32)
    data = {"source": sources,
            "destination": destinations,
            "weights": weights_r}
    df = cudf.DataFrame(data)
    G = cugraph.Graph()
    G.from_cudf_edgelist(input_df=df, edge_attr="weights")
    return G


def get_hungarian_matches_cugraph(pcd1: np.ndarray, pcd2: np.ndarray) -> np.ndarray:
    num_points = pcd1.shape[0]
    pcd1_cp, pcd2_cp = cp.asarray(pcd1), cp.asarray(pcd2)
    distances_cp = cp.array(pairwise_distance(pcd1_cp, pcd2_cp))
    graph = create_bipartite_graph(distances_cp)
    workers = cudf.Series(cp.arange(num_points), dtype='int64')
    cost, df = cugraph.hungarian(graph, workers)
    idx1 = np.expand_dims(cp.asnumpy(df.values[:, 0]), axis=-1)
    idx2 = np.expand_dims(cp.asnumpy(df.values[:, 1] - num_points), axis=-1)
    return np.concatenate([idx1, idx2], axis=-1)

I was expecting the cugraph approach to be much faster, but it is actually significantly slower. For instance for two sets of points of shape (1024, 3), the scipy approach takes about 0.03 seconds, but the cugraph approach takes about 1.3 seconds on my RTX 4090. I'm wondering if there is anything I am doing wrong to account for this?

JamesMcCullochDickens avatar Oct 06 '24 01:10 JamesMcCullochDickens