How to use parallel mode for video frame acquisition and object detection of industrial cameras
Search before asking
- [X] I have searched the YOLOv8 issues and discussions and found no similar questions.
Question
hello. I am using serial detection in my current code, which is to get the image frame first and then do the object detection, and I find that this practice leads to high latency. I want to do video frame acquisition and object detection in a parallel way, how should I modify my code to achieve the above functions. My code is attached.
Additional
`# -- coding: utf-8 --
import sys import time from ctypes import * import datetime import numpy import cv2 from IMVApi import * from ultralytics import YOLO
def displayDeviceInfo(deviceInfoList): print("Idx Type Vendor Model S/N DeviceUserID IP Address") print("------------------------------------------------------------------------------------------------") for i in range(0, deviceInfoList.nDevNum): pDeviceInfo = deviceInfoList.pDevInfo[i] strType = "" strVendorName = "" strModeName = "" strSerialNumber = "" strCameraname = "" strIpAdress = "" for str in pDeviceInfo.vendorName: strVendorName = strVendorName + chr(str) for str in pDeviceInfo.modelName: strModeName = strModeName + chr(str) for str in pDeviceInfo.serialNumber: strSerialNumber = strSerialNumber + chr(str) for str in pDeviceInfo.cameraName: strCameraname = strCameraname + chr(str) for str in pDeviceInfo.DeviceSpecificInfo.gigeDeviceInfo.ipAddress: strIpAdress = strIpAdress + chr(str) if pDeviceInfo.nCameraType == typeGigeCamera: strType = "Gige" elif pDeviceInfo.nCameraType == typeU3vCamera: strType = "U3V" print("[%d] %s %s %s %s %s %s" % ( i + 1, strType, strVendorName, strModeName, strSerialNumber, strCameraname, strIpAdress))
if name == "main": model = YOLO(r"runs/detect/train5-Fasternet2/weights/best.pt") deviceList = IMV_DeviceList() interfaceType = IMV_EInterfaceType.interfaceTypeAll stRecordParam = IMV_RecordParam() nWidth = c_uint() nHeight = c_uint()
# 枚举设备
nRet = MvCamera.IMV_EnumDevices(deviceList, interfaceType)
if IMV_OK != nRet:
print("Enumeration devices failed! ErrorCode", nRet)
sys.exit()
if deviceList.nDevNum == 0:
print("find no device!")
sys.exit()
print("deviceList size is", deviceList.nDevNum)
displayDeviceInfo(deviceList)
nConnectionNum = input("Please input the camera index: ")
if int(nConnectionNum) > deviceList.nDevNum:
print("intput error!")
sys.exit()
cam = MvCamera()
# 创建设备句柄
nRet = cam.IMV_CreateHandle(IMV_ECreateHandleMode.modeByIndex, byref(c_void_p(int(nConnectionNum) - 1)))
if IMV_OK != nRet:
print("Create devHandle failed! ErrorCode", nRet)
sys.exit()
# 打开相机
nRet = cam.IMV_Open()
if IMV_OK != nRet:
print("Open devHandle failed! ErrorCode", nRet)
sys.exit()
# 通用属性设置:设置触发模式为off
nRet = IMV_OK
nRet = cam.IMV_SetEnumFeatureSymbol("TriggerSource", "Software")
if IMV_OK != nRet:
print("Set triggerSource value failed! ErrorCode[%d]" % nRet)
sys.exit()
nRet = cam.IMV_SetEnumFeatureSymbol("TriggerSelector", "FrameStart")
if IMV_OK != nRet:
print("Set triggerSelector value failed! ErrorCode[%d]" % nRet)
sys.exit()
nRet = cam.IMV_SetEnumFeatureSymbol("TriggerMode", "Off")
if IMV_OK != nRet:
print("Set triggerMode value failed! ErrorCode[%d]" % nRet)
sys.exit()
# 开始拉流
nRet = cam.IMV_StartGrabbing()
if IMV_OK != nRet:
print("Start grabbing failed! ErrorCode", nRet)
sys.exit()
isGrab = True
start_time = time.time()
frame_count = 0
while isGrab:
# 主动取图
frame = IMV_Frame()
stPixelConvertParam = IMV_PixelConvertParam()
nRet = cam.IMV_GetFrame(frame, 1000)
if IMV_OK != nRet:
print("getFrame fail! Timeout:[1000]ms")
continue
else:
print("getFrame success BlockId = [" + str(frame.frameInfo.blockId) + "], get frame time: " + str(
datetime.datetime.now()))
if None == byref(frame):
print("pFrame is NULL!")
continue
# 给转码所需的参数赋值
if IMV_EPixelType.gvspPixelMono8 == frame.frameInfo.pixelFormat:
nDstBufSize = frame.frameInfo.width * frame.frameInfo.height
else:
nDstBufSize = frame.frameInfo.width * frame.frameInfo.height * 3
pDstBuf = (c_ubyte * nDstBufSize)()
memset(byref(stPixelConvertParam), 0, sizeof(stPixelConvertParam))
stPixelConvertParam.nWidth = frame.frameInfo.width
stPixelConvertParam.nHeight = frame.frameInfo.height
stPixelConvertParam.ePixelFormat = frame.frameInfo.pixelFormat
stPixelConvertParam.pSrcData = frame.pData
stPixelConvertParam.nSrcDataLen = frame.frameInfo.size
stPixelConvertParam.nPaddingX = frame.frameInfo.paddingX
stPixelConvertParam.nPaddingY = frame.frameInfo.paddingY
stPixelConvertParam.eBayerDemosaic = IMV_EBayerDemosaic.demosaicNearestNeighbor
stPixelConvertParam.eDstPixelFormat = frame.frameInfo.pixelFormat
stPixelConvertParam.pDstBuf = pDstBuf
stPixelConvertParam.nDstBufSize = nDstBufSize
# 释放驱动图像缓存
# release frame resource at the end of use
nRet = cam.IMV_ReleaseFrame(frame)
if IMV_OK != nRet:
print("Release frame failed! ErrorCode[%d]\n", nRet)
sys.exit()
# 如果图像格式是 Mono8 直接使用
# no format conversion required for Mono8
if stPixelConvertParam.ePixelFormat == IMV_EPixelType.gvspPixelMono8:
imageBuff = stPixelConvertParam.pSrcData
userBuff = c_buffer(b'\0', stPixelConvertParam.nDstBufSize)
memmove(userBuff, imageBuff, stPixelConvertParam.nDstBufSize)
grayByteArray = bytearray(userBuff)
cvImage = numpy.array(grayByteArray).reshape(stPixelConvertParam.nHeight, stPixelConvertParam.nWidth)
else:
# 转码 => BGR24
# convert to BGR24
stPixelConvertParam.eDstPixelFormat = IMV_EPixelType.gvspPixelBGR8
# stPixelConvertParam.nDstBufSize=nDstBufSize
nRet = cam.IMV_PixelConvert(stPixelConvertParam)
if IMV_OK != nRet:
print("image convert to failed! ErrorCode[%d]" % nRet)
del pDstBuf
sys.exit()
rgbBuff = c_buffer(b'\0', stPixelConvertParam.nDstBufSize)
memmove(rgbBuff, stPixelConvertParam.pDstBuf, stPixelConvertParam.nDstBufSize)
colorByteArray = bytearray(rgbBuff)
cvImage = numpy.array(colorByteArray).reshape(stPixelConvertParam.nHeight, stPixelConvertParam.nWidth, 3)
if None != pDstBuf:
del pDstBuf
pass
# --- end if ---
results = model.predict(source=cvImage, line_width=1, imgsz=[1152, 1504])
cvImage = results[0].plot()
window_name = 'Balls'
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
frame_count += 1
current_time = time.time()
elapsed_time = current_time - start_time
if elapsed_time >= 1.0:
fps = frame_count / elapsed_time
# 在帧上显示 FPS
cv2.putText(cvImage, f"FPS: {int(fps)}", (40, 80), cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 255, 0), 3)
cv2.imshow(window_name, cvImage)
boxes = results[0].boxes.xywh.cpu()
cls = results[0].boxes.cls.tolist()
names = results[0].names
for box, cls in zip(boxes, cls):
x, y, w, h = box
name = names[cls]
print(f"{name}中心坐标为({x:.2f},{y:.2f}) ")
if (cv2.waitKey(1) >= 0):
isGrab = False
break
if (cv2.waitKey(1) >= 0):
isGrab = False
break
# 停止拉流
nRet = cam.IMV_StopGrabbing()
if IMV_OK != nRet:
print("Stop grabbing failed! ErrorCode", nRet)
sys.exit()
# 关闭相机
nRet = cam.IMV_Close()
if IMV_OK != nRet:
print("Close camera failed! ErrorCode", nRet)
sys.exit()
# 销毁句柄
if (cam.handle):
nRet = cam.IMV_DestroyHandle()
print("---Demo end---")`
@glenn-jocher Hi,could you help me to solve this question?
@Sparklexa hello! Thanks for reaching out. 😊 To achieve parallel processing of video frame acquisition and object detection, you might consider using Python's threading or multiprocessing modules. Here is a simplified approach to get you started:
import cv2
from threading import Thread
from queue import Queue
from ultralytics import YOLO
# Initialize model
model = YOLO("path_to_your_model.pt")
# Queue for frames
frameQueue = Queue()
def grab_frames(cam, frameQueue):
while True:
ret, frame = cam.read()
if not ret:
break
frameQueue.put(frame)
def process_frames(frameQueue, model):
while True:
if not frameQueue.empty():
frame = frameQueue.get()
results = model.predict(source=frame)
cv2.imshow('Frame', results[0].plot())
cv2.waitKey(1)
if __name__ == '__main__':
cam = cv2.VideoCapture(0) # Update with your camera ID or video file
# Thread for frame grabbing
t1 = Thread(target=grab_frames, args=(cam, frameQueue))
t1.start()
# Thread for frame processing
t2 = Thread(target=process_frames, args=(frameQueue, model))
t2.start()
t1.join()
t2.join()
cam.release()
cv2.destroyAllWindows()
This example uses Python's threading module to run frame grabbing and processing in parallel. Frames are grabbed in one thread and put in a Queue, while another thread fetches these frames for object detection. This setup should reduce the latency you're experiencing with serial detection. Just make sure to adjust "path_to_your_model.pt" and the camera source as needed. Feel free to tweak this example to better fit your application! 🚀
Let me know if you have any more questions or need further assistance.
👋 Hello there! We wanted to give you a friendly reminder that this issue has not had any recent activity and may be closed soon, but don't worry - you can always reopen it if needed. If you still have any questions or concerns, please feel free to let us know how we can help.
For additional resources and information, please see the links below:
- Docs: https://docs.ultralytics.com
- HUB: https://hub.ultralytics.com
- Community: https://community.ultralytics.com
Feel free to inform us of any other issues you discover or feature requests that come to mind in the future. Pull Requests (PRs) are also always welcomed!
Thank you for your contributions to YOLO 🚀 and Vision AI ⭐