robot-grasp-detection
robot-grasp-detection copied to clipboard
I am new to tensorflow and CNN, how can I predict a grasp of a image using existing model, it seems that this repository only include training code, thank you.
Did you solve this problem?
@woshisj I finally figured out the right way to predict bbox from existing model, the code goes like this.
import tensorflow as tf
from grasp_inf import inference
from grasp_det import grasp_to_bbox
import cv2
filename = '../cornell_grasping_dataset/01/pcd0159r.png'
def draw_bbox(img, bbox):
p1 = (int(float(bbox[0][0]) / 0.35), int(float(bbox[0][1]) / 0.47))
p2 = (int(float(bbox[1][0]) / 0.35), int(float(bbox[1][1]) / 0.47))
p3 = (int(float(bbox[2][0]) / 0.35), int(float(bbox[2][1]) / 0.47))
p4 = (int(float(bbox[3][0]) / 0.35), int(float(bbox[3][1]) / 0.47))
cv2.line(img, p1, p2, (0, 0, 255))
cv2.line(img, p2, p3, (0, 0, 255))
cv2.line(img, p3, p4, (0, 0, 255))
cv2.line(img, p4, p1, (0, 0, 255))
if __name__ == '__main__':
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess = tf.Session()
sess.run(init_op)
img_raw_data = tf.gfile.FastGFile(filename, 'rb').read()
img_show = cv2.imread(filename)
img_data = tf.image.decode_png(img_raw_data)
img_data = tf.image.convert_image_dtype(img_data, dtype=tf.float32)
img_reshape = tf.image.resize_images(img_data, [224, 224])
img_reshape = tf.reshape(img_reshape, shape=[1, 224, 224, 3])
x_hat, y_hat, tan_hat, w_hat, h_hat = tf.unstack(inference(img_reshape), axis=1)
bbox_hat = grasp_to_bbox(x_hat, y_hat, tan_hat, h_hat, w_hat)
dg={}
lg = ['w1', 'b1', 'w2', 'b2', 'w3', 'b3', 'w4', 'b4', 'w5', 'b5', 'w_fc1', 'b_fc1', 'w_fc2', 'b_fc2', 'w_output', 'b_output']
for i in lg:
dg[i] = [v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) if v.name == i+':0'][0]
saver_g = tf.train.Saver(dg)
saver_g.restore(sess, './models/grasp/m4/m4.ckpt')
bbox_model = sess.run(bbox_hat)
draw_bbox(img_show, bbox_model)
cv2.imshow('bbox', img_show)
cv2.waitKey(0)
@woshisj I finally figured out the right way to predict bbox from existing model, the code goes like this.
import tensorflow as tf from grasp_inf import inference from grasp_det import grasp_to_bbox import cv2 filename = '../cornell_grasping_dataset/01/pcd0159r.png' def draw_bbox(img, bbox): p1 = (int(float(bbox[0][0]) / 0.35), int(float(bbox[0][1]) / 0.47)) p2 = (int(float(bbox[1][0]) / 0.35), int(float(bbox[1][1]) / 0.47)) p3 = (int(float(bbox[2][0]) / 0.35), int(float(bbox[2][1]) / 0.47)) p4 = (int(float(bbox[3][0]) / 0.35), int(float(bbox[3][1]) / 0.47)) cv2.line(img, p1, p2, (0, 0, 255)) cv2.line(img, p2, p3, (0, 0, 255)) cv2.line(img, p3, p4, (0, 0, 255)) cv2.line(img, p4, p1, (0, 0, 255)) if __name__ == '__main__': init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess = tf.Session() sess.run(init_op) img_raw_data = tf.gfile.FastGFile(filename, 'rb').read() img_show = cv2.imread(filename) img_data = tf.image.decode_png(img_raw_data) img_data = tf.image.convert_image_dtype(img_data, dtype=tf.float32) img_reshape = tf.image.resize_images(img_data, [224, 224]) img_reshape = tf.reshape(img_reshape, shape=[1, 224, 224, 3]) x_hat, y_hat, tan_hat, w_hat, h_hat = tf.unstack(inference(img_reshape), axis=1) bbox_hat = grasp_to_bbox(x_hat, y_hat, tan_hat, h_hat, w_hat) dg={} lg = ['w1', 'b1', 'w2', 'b2', 'w3', 'b3', 'w4', 'b4', 'w5', 'b5', 'w_fc1', 'b_fc1', 'w_fc2', 'b_fc2', 'w_output', 'b_output'] for i in lg: dg[i] = [v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) if v.name == i+':0'][0] saver_g = tf.train.Saver(dg) saver_g.restore(sess, './models/grasp/m4/m4.ckpt') bbox_model = sess.run(bbox_hat) draw_bbox(img_show, bbox_model) cv2.imshow('bbox', img_show) cv2.waitKey(0)
Does it work well ? I use the code to predict an image which is either the raw data or my own image but always get a terrible result .And I just don't know why ...
@lx-onism Maybe it's the problem of inital weight.
@lx-onism What does raw data mean?
@lx-onism What does raw data mean?
hahaha,means the images of Cornell Grasping Dataset.
@lx-onism Maybe it's the problem of inital weight.
why? I use the given model and I think the weights may not be modified.
@soldatjiang Thank you for sharing the way to predict bbox and the result is ok.However,why is the bbox like parallelograms?
@jinhuan-hit Did you successfully predict the bbox with the way provided by soldatjiang? Could you show me the project code, THANK YOU SO MUCH.
@woshisj I finally figured out the right way to predict bbox from existing model, the code goes like this.
import tensorflow as tf from grasp_inf import inference from grasp_det import grasp_to_bbox import cv2 filename = '../cornell_grasping_dataset/01/pcd0159r.png' def draw_bbox(img, bbox): p1 = (int(float(bbox[0][0]) / 0.35), int(float(bbox[0][1]) / 0.47)) p2 = (int(float(bbox[1][0]) / 0.35), int(float(bbox[1][1]) / 0.47)) p3 = (int(float(bbox[2][0]) / 0.35), int(float(bbox[2][1]) / 0.47)) p4 = (int(float(bbox[3][0]) / 0.35), int(float(bbox[3][1]) / 0.47)) cv2.line(img, p1, p2, (0, 0, 255)) cv2.line(img, p2, p3, (0, 0, 255)) cv2.line(img, p3, p4, (0, 0, 255)) cv2.line(img, p4, p1, (0, 0, 255)) if __name__ == '__main__': init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess = tf.Session() sess.run(init_op) img_raw_data = tf.gfile.FastGFile(filename, 'rb').read() img_show = cv2.imread(filename) img_data = tf.image.decode_png(img_raw_data) img_data = tf.image.convert_image_dtype(img_data, dtype=tf.float32) img_reshape = tf.image.resize_images(img_data, [224, 224]) img_reshape = tf.reshape(img_reshape, shape=[1, 224, 224, 3]) x_hat, y_hat, tan_hat, w_hat, h_hat = tf.unstack(inference(img_reshape), axis=1) bbox_hat = grasp_to_bbox(x_hat, y_hat, tan_hat, h_hat, w_hat) dg={} lg = ['w1', 'b1', 'w2', 'b2', 'w3', 'b3', 'w4', 'b4', 'w5', 'b5', 'w_fc1', 'b_fc1', 'w_fc2', 'b_fc2', 'w_output', 'b_output'] for i in lg: dg[i] = [v for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) if v.name == i+':0'][0] saver_g = tf.train.Saver(dg) saver_g.restore(sess, './models/grasp/m4/m4.ckpt') bbox_model = sess.run(bbox_hat) draw_bbox(img_show, bbox_model) cv2.imshow('bbox', img_show) cv2.waitKey(0)
Hello,where could I find the dataset of Cornell,its link in this github is expired. Thanks a lot.
cv2.error: OpenCV(4.5.1) C:\Users\appveyor\AppData\Local\Temp\1\pip-req-build-cl8wq7nq\opencv\modules\highgui\src\window.cpp:651: error: (-2:Unspecified error) The function is not implemented. Rebuild the library with Windows, GTK+ 2.x or Cocoa support. If you are on Ubuntu or Debian, install libgtk2.0-dev and pkg-config, then re-run cmake or configure script in function 'cvShowImage'
May I ask how to solve this problem?
cv2.error: OpenCV(4.5.1) C:\Users\appveyor\AppData\Local\Temp\1\pip-req-build-cl8wq7nq\opencv\modules\highgui\src\window.cpp:651: error: (-2:Unspecified error) The function is not implemented. Rebuild the library with Windows, GTK+ 2.x or Cocoa support. If you are on Ubuntu or Debian, install libgtk2.0-dev and pkg-config, then re-run cmake or configure script in function 'cvShowImage'
May I ask how to solve this problem?
@zhoumo1121 Maybe it's the problem on OpenCV version.
ValueError: Tensor conversion requested dtype int32 for Tensor with dtype float64: 'Tensor("truediv:0", shape=(), dtype=float64, device=/device:CPU:0)' May I ask how to solve this problem? Thanks, I've been working on it for a long time!