robot-grasp-detection icon indicating copy to clipboard operation
robot-grasp-detection copied to clipboard

I am new to tensorflow and CNN, how can I predict a grasp of a image using existing model, it seems that this repository only include training code, thank you.

Open soldatjiang opened this issue 6 years ago • 13 comments

soldatjiang avatar Apr 16 '18 09:04 soldatjiang

Did you solve this problem?

woshisj avatar Apr 27 '18 01:04 woshisj

@woshisj I finally figured out the right way to predict bbox from existing model, the code goes like this.

import tensorflow as tf
from grasp_inf import inference
from grasp_det import grasp_to_bbox
import cv2

filename = '../cornell_grasping_dataset/01/pcd0159r.png'

def draw_bbox(img, bbox):
    p1 = (int(float(bbox[0][0]) / 0.35), int(float(bbox[0][1]) / 0.47))
    p2 = (int(float(bbox[1][0]) / 0.35), int(float(bbox[1][1]) / 0.47))
    p3 = (int(float(bbox[2][0]) / 0.35), int(float(bbox[2][1]) / 0.47))
    p4 = (int(float(bbox[3][0]) / 0.35), int(float(bbox[3][1]) / 0.47))

    cv2.line(img, p1, p2, (0, 0, 255))
    cv2.line(img, p2, p3, (0, 0, 255))
    cv2.line(img, p3, p4, (0, 0, 255))
    cv2.line(img, p4, p1, (0, 0, 255))


if __name__ == '__main__':
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    sess = tf.Session()
    sess.run(init_op)
    img_raw_data = tf.gfile.FastGFile(filename, 'rb').read()
    img_show = cv2.imread(filename)
    img_data = tf.image.decode_png(img_raw_data)
    img_data = tf.image.convert_image_dtype(img_data, dtype=tf.float32)
    img_reshape = tf.image.resize_images(img_data, [224, 224])
    img_reshape = tf.reshape(img_reshape, shape=[1, 224, 224, 3])
    x_hat, y_hat, tan_hat, w_hat, h_hat = tf.unstack(inference(img_reshape), axis=1)
    bbox_hat = grasp_to_bbox(x_hat, y_hat, tan_hat, h_hat, w_hat)
    dg={}
    lg = ['w1', 'b1', 'w2', 'b2', 'w3', 'b3', 'w4', 'b4', 'w5', 'b5', 'w_fc1', 'b_fc1', 'w_fc2', 'b_fc2', 'w_output', 'b_output']
    for i in lg:
        dg[i] = [v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) if v.name == i+':0'][0]

    saver_g = tf.train.Saver(dg)
    saver_g.restore(sess, './models/grasp/m4/m4.ckpt')
    bbox_model = sess.run(bbox_hat)
    draw_bbox(img_show, bbox_model)
    cv2.imshow('bbox', img_show)
    cv2.waitKey(0)

soldatjiang avatar May 04 '18 08:05 soldatjiang

@woshisj I finally figured out the right way to predict bbox from existing model, the code goes like this.

import tensorflow as tf
from grasp_inf import inference
from grasp_det import grasp_to_bbox
import cv2

filename = '../cornell_grasping_dataset/01/pcd0159r.png'

def draw_bbox(img, bbox):
    p1 = (int(float(bbox[0][0]) / 0.35), int(float(bbox[0][1]) / 0.47))
    p2 = (int(float(bbox[1][0]) / 0.35), int(float(bbox[1][1]) / 0.47))
    p3 = (int(float(bbox[2][0]) / 0.35), int(float(bbox[2][1]) / 0.47))
    p4 = (int(float(bbox[3][0]) / 0.35), int(float(bbox[3][1]) / 0.47))

    cv2.line(img, p1, p2, (0, 0, 255))
    cv2.line(img, p2, p3, (0, 0, 255))
    cv2.line(img, p3, p4, (0, 0, 255))
    cv2.line(img, p4, p1, (0, 0, 255))


if __name__ == '__main__':
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    sess = tf.Session()
    sess.run(init_op)
    img_raw_data = tf.gfile.FastGFile(filename, 'rb').read()
    img_show = cv2.imread(filename)
    img_data = tf.image.decode_png(img_raw_data)
    img_data = tf.image.convert_image_dtype(img_data, dtype=tf.float32)
    img_reshape = tf.image.resize_images(img_data, [224, 224])
    img_reshape = tf.reshape(img_reshape, shape=[1, 224, 224, 3])
    x_hat, y_hat, tan_hat, w_hat, h_hat = tf.unstack(inference(img_reshape), axis=1)
    bbox_hat = grasp_to_bbox(x_hat, y_hat, tan_hat, h_hat, w_hat)
    dg={}
    lg = ['w1', 'b1', 'w2', 'b2', 'w3', 'b3', 'w4', 'b4', 'w5', 'b5', 'w_fc1', 'b_fc1', 'w_fc2', 'b_fc2', 'w_output', 'b_output']
    for i in lg:
        dg[i] = [v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) if v.name == i+':0'][0]

    saver_g = tf.train.Saver(dg)
    saver_g.restore(sess, './models/grasp/m4/m4.ckpt')
    bbox_model = sess.run(bbox_hat)
    draw_bbox(img_show, bbox_model)
    cv2.imshow('bbox', img_show)
    cv2.waitKey(0)

Does it work well ? I use the code to predict an image which is either the raw data or my own image but always get a terrible result .And I just don't know why ...

lx-onism avatar Oct 11 '18 03:10 lx-onism

@lx-onism Maybe it's the problem of inital weight.

soldatjiang avatar Oct 11 '18 07:10 soldatjiang

@lx-onism What does raw data mean?

soldatjiang avatar Oct 11 '18 07:10 soldatjiang

@lx-onism What does raw data mean?

hahaha,means the images of Cornell Grasping Dataset.

lx-onism avatar Oct 18 '18 00:10 lx-onism

@lx-onism Maybe it's the problem of inital weight.

why? I use the given model and I think the weights may not be modified.

lx-onism avatar Oct 18 '18 00:10 lx-onism

@soldatjiang Thank you for sharing the way to predict bbox and the result is ok.However,why is the bbox like parallelograms?

jinhuan-hit avatar Mar 22 '19 13:03 jinhuan-hit

@jinhuan-hit Did you successfully predict the bbox with the way provided by soldatjiang? Could you show me the project code, THANK YOU SO MUCH.

oslo71 avatar Mar 31 '20 15:03 oslo71

@woshisj I finally figured out the right way to predict bbox from existing model, the code goes like this.

import tensorflow as tf
from grasp_inf import inference
from grasp_det import grasp_to_bbox
import cv2

filename = '../cornell_grasping_dataset/01/pcd0159r.png'

def draw_bbox(img, bbox):
    p1 = (int(float(bbox[0][0]) / 0.35), int(float(bbox[0][1]) / 0.47))
    p2 = (int(float(bbox[1][0]) / 0.35), int(float(bbox[1][1]) / 0.47))
    p3 = (int(float(bbox[2][0]) / 0.35), int(float(bbox[2][1]) / 0.47))
    p4 = (int(float(bbox[3][0]) / 0.35), int(float(bbox[3][1]) / 0.47))

    cv2.line(img, p1, p2, (0, 0, 255))
    cv2.line(img, p2, p3, (0, 0, 255))
    cv2.line(img, p3, p4, (0, 0, 255))
    cv2.line(img, p4, p1, (0, 0, 255))


if __name__ == '__main__':
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    sess = tf.Session()
    sess.run(init_op)
    img_raw_data = tf.gfile.FastGFile(filename, 'rb').read()
    img_show = cv2.imread(filename)
    img_data = tf.image.decode_png(img_raw_data)
    img_data = tf.image.convert_image_dtype(img_data, dtype=tf.float32)
    img_reshape = tf.image.resize_images(img_data, [224, 224])
    img_reshape = tf.reshape(img_reshape, shape=[1, 224, 224, 3])
    x_hat, y_hat, tan_hat, w_hat, h_hat = tf.unstack(inference(img_reshape), axis=1)
    bbox_hat = grasp_to_bbox(x_hat, y_hat, tan_hat, h_hat, w_hat)
    dg={}
    lg = ['w1', 'b1', 'w2', 'b2', 'w3', 'b3', 'w4', 'b4', 'w5', 'b5', 'w_fc1', 'b_fc1', 'w_fc2', 'b_fc2', 'w_output', 'b_output']
    for i in lg:
        dg[i] = [v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) if v.name == i+':0'][0]

    saver_g = tf.train.Saver(dg)
    saver_g.restore(sess, './models/grasp/m4/m4.ckpt')
    bbox_model = sess.run(bbox_hat)
    draw_bbox(img_show, bbox_model)
    cv2.imshow('bbox', img_show)
    cv2.waitKey(0)

Hello,where could I find the dataset of Cornell,its link in this github is expired. Thanks a lot.

Running-Chen avatar Jan 12 '21 13:01 Running-Chen

cv2.error: OpenCV(4.5.1) C:\Users\appveyor\AppData\Local\Temp\1\pip-req-build-cl8wq7nq\opencv\modules\highgui\src\window.cpp:651: error: (-2:Unspecified error) The function is not implemented. Rebuild the library with Windows, GTK+ 2.x or Cocoa support. If you are on Ubuntu or Debian, install libgtk2.0-dev and pkg-config, then re-run cmake or configure script in function 'cvShowImage'

May I ask how to solve this problem?

zhoumo1121 avatar Jun 23 '21 02:06 zhoumo1121

cv2.error: OpenCV(4.5.1) C:\Users\appveyor\AppData\Local\Temp\1\pip-req-build-cl8wq7nq\opencv\modules\highgui\src\window.cpp:651: error: (-2:Unspecified error) The function is not implemented. Rebuild the library with Windows, GTK+ 2.x or Cocoa support. If you are on Ubuntu or Debian, install libgtk2.0-dev and pkg-config, then re-run cmake or configure script in function 'cvShowImage'

May I ask how to solve this problem?

@zhoumo1121 Maybe it's the problem on OpenCV version.

soldatjiang avatar Jul 02 '21 03:07 soldatjiang

ValueError: Tensor conversion requested dtype int32 for Tensor with dtype float64: 'Tensor("truediv:0", shape=(), dtype=float64, device=/device:CPU:0)' May I ask how to solve this problem? Thanks, I've been working on it for a long time!

zhoumo1121 avatar Jul 02 '21 04:07 zhoumo1121