【正点原子STM32MP257开发板试用】基于 YOLO 模型的物体识别
本文介绍了正点原子 STM32MP257 开发板基于 YOLO-v5 模型实现物体识别的项目设计。
项目介绍
- YOLO 模型介绍;
- 官方 Demo 测试;
- 动态识别:加载 USB 摄像头实现动态画面的物体识别,结果显示在板载LCD屏;
- 静态识别:通过加载本地图片并执行板端推理实现静态图片的识别,结果保存至本地路径。
YOLO 模型
YOLOv5 是 YOLO 目标检测算法系列的版本之一。作为计算机视觉领域的重要进展,YOLOv5 在目标检测方面取得了显著的性能提升。

与之前的版本相比,YOLOv5 采用了单阶段检测策略,通过一次前向传播即可实现物体的定位和分类。它引入了 CSPDarknet53
架构作为基础网络,以提取丰富的特征信息,从而有效地增强了检测的准确性。
YOLOv5 还在多尺度检测上做出了改进,能够检测不同尺度的目标,从小物体到大物体。这在处理多样化场景时表现出色。
此外,数据增强技术也被广泛应用,通过随机裁剪、颜色变换等方式增加了训练数据的多样性,提升了模型的泛化能力。
在计算效率方面,YOLOv5 在保持高检测性能的同时,优化了模型的大小和速度,适用于嵌入式设备、移动端以及实时应用。其开源性质使得研究人员和开发者可以自由访问代码和预训练模型,进行研究和定制。
总之,YOLOv5 通过单阶段检测、高效网络架构、多尺度策略和数据增强等技术,极大地提升了目标检测的性能。它在计算机视觉、自动驾驶、安防监控等领域具有广泛的应用前景,为实时目标检测任务带来了新的解决方案。
YOLOv5 模型详见: ultralytics/yolov5 .
模型检测 Demo
- 下载例程并解压,路径为
01、程序源码/05、AI例程源码/07、yolov5n_weston/atk_yolov5_weston_demo.zip
;
- 该 Demo 例程使用 python 语言,因此将解压后的 python 程序和模型等文件传输至开发板;
- 进入例程可执行文件对应目录,终端执行如下指令
cd atk_yolov5_weston_demo/
python3 atk_yolov5_weston.py -m yolov5n-uint.nb
- 将目标识别物体置于摄像头前,即可在屏幕上显示识别结果、包括物体外框、推理时间以及置信度等信息。

动态识别
介绍了使用 USB 摄像头获取动态画面并进行推理,进而实现物体识别的主要流程。
流程图

代码
终端执行指令 touch object_yolov5_camera.py
新建 python 程序文件,添加如下代码
import argparse
from yolov5_pp import NeuralNetwork
import cv2
CLASS_NAMES = [
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
"dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
"umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball",
"kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket",
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
"chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop",
"mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink",
"refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
]
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model_file", required=True, help="Path to YOLOv5 model file")
args = parser.parse_args()
nn = NeuralNetwork(
model_file=args.model_file,
score_threshold=0.45,
iou_threshold=0.5
)
cap = cv2.VideoCapture("/dev/video7")
if not cap.isOpened():
print("无法打开摄像头")
return
try:
while True:
ret, frame = cap.read()
if not ret:
break
input_img = cv2.resize(frame, (nn.width, nn.height))
input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)
nn.launch_inference(input_img)
detections = nn.get_results()
for det in detections:
x, y, w, h, score, cls_id = det
x1, y1 = int(x * frame.shape[1]), int(y * frame.shape[0])
x2, y2 = int((x + w) * frame.shape[1]), int((y + h) * frame.shape[0])
class_name = CLASS_NAMES[int(cls_id)] if int(cls_id) < len(CLASS_NAMES) else str(cls_id)
label = f"{class_name}: {score:.2f}"
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, label, (x1, y1-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv2.imshow("YOLOv5 Detection", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
finally:
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
终端执行指令 touch yolov5_pp.py
新建 python 程序文件,添加如下代码
from stai_mpu import stai_mpu_network
import numpy as np
class NeuralNetwork:
def __init__(self, model_file, score_threshold=0.45, iou_threshold=0.5):
"""初始化YOLOv5模型"""
self.model = stai_mpu_network(model_path=model_file)
self.score_thres = score_threshold
self.iou_thres = iou_threshold
input_info = self.model.get_input_infos()[0]
self.input_shape = input_info.get_shape()
self.height, self.width = self.input_shape[1], self.input_shape[2]
self.output_scale = 0.005922
self.output_zero_point = 3
def get_img_size(self):
"""返回模型输入尺寸 (width, height, channels)"""
return (self.width, self.height, self.input_shape[0])
def launch_inference(self, img):
"""执行推理"""
input_data = np.expand_dims(img, axis=0)
self.model.set_input(0, input_data)
self.model.run()
def get_results(self):
"""获取并后处理检测结果"""
raw_output = self.model.get_output(0)[0]
pred = (raw_output - self.output_zero_point) * self.output_scale
boxes = pred[:, :4]
conf = pred[:, 4]
cls_probs = pred[:, 5:]
class_ids = np.argmax(cls_probs, axis=1)
scores = conf * cls_probs[np.arange(len(cls_probs)), class_ids]
mask = scores > self.score_thres
if not np.any(mask):
return []
boxes, scores, class_ids = boxes[mask], scores[mask], class_ids[mask]
boxes[:, :2] -= boxes[:, 2:] / 2
keep = self.nms(boxes, scores)
return np.column_stack([boxes[keep], scores[keep], class_ids[keep]]).tolist()
def nms(self, boxes, scores):
"""简化的非极大值抑制"""
x1, y1 = boxes[:, 0], boxes[:, 1]
x2, y2 = boxes[:, 0] + boxes[:, 2], boxes[:, 1] + boxes[:, 3]
areas = (x2 - x1) * (y2 - y1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w, h = np.maximum(0, xx2 - xx1), np.maximum(0, yy2 - yy1)
iou = (w * h) / (areas[i] + areas[order[1:]] - w * h)
order = order[np.where(iou <= self.iou_thres)[0] + 1]
return keep
- 连接 USB 摄像头,终端执行指令
python3 object_yolov5_camera.py -m yolov5n-uint.nb
;
- 开启摄像头并运行画面采集、物体识别推理和识别结果显示等。
效果

动态效果见顶部视频。
静态识别
介绍了通过对本地图片的 板端推理 实现物体识别的主要流程。
流程图

代码
终端执行指令 touch yolov5_inference.py
新建 python 程序文件,添加如下代码
代码中调用了前面动态识别建立的 yolov5_pp.py
文件
import argparse
import cv2
import os
from yolov5_pp import NeuralNetwork
CLASS_NAMES = [
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
"dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
"umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball",
"kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket",
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
"chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop",
"mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink",
"refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
]
def process_image(model, image_path, input_width, input_height):
frame = cv2.imread(image_path)
if frame is None:
print(f"Error: 无法读取图片 {image_path}")
return None, []
original_height, original_width = frame.shape[:2]
input_img = cv2.resize(frame, (input_width, input_height))
input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)
model.launch_inference(input_img)
detections = model.get_results()
results = []
for det in detections:
x, y, w, h, score, cls_id = det
x_abs = int(x * original_width)
y_abs = int(y * original_height)
w_abs = int(w * original_width)
h_abs = int(h * original_height)
class_name = CLASS_NAMES[int(cls_id)] if int(cls_id) < len(CLASS_NAMES) else str(cls_id)
results.append({
"class": class_name,
"confidence": float(score),
"bbox": [x_abs, y_abs, w_abs, h_abs]
})
cv2.rectangle(frame, (x_abs, y_abs), (x_abs+w_abs, y_abs+h_abs), (0, 255, 0), 2)
label = f"{class_name}: {score:.2f}"
cv2.putText(frame, label, (x_abs, y_abs-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
return frame, results
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model_file", required=True, help="YOLOv5模型文件路径")
parser.add_argument("-p", "--image_path", required=True, help="待检测图片路径")
args = parser.parse_args()
if not os.path.exists(args.model_file):
print(f"错误: 模型文件 {args.model_file} 不存在")
return
if not os.path.exists(args.image_path):
print(f"错误: 图片文件 {args.image_path} 不存在")
return
nn = NeuralNetwork(
model_file=args.model_file,
score_threshold=0.45,
iou_threshold=0.5
)
input_width, input_height, _ = nn.get_img_size()
result_img, detections = process_image(nn, args.image_path, input_width, input_height)
if result_img is not None:
dir_name, file_name = os.path.split(args.image_path)
name, ext = os.path.splitext(file_name)
output_path = f"{name}_detected{ext}"
cv2.imwrite(output_path, result_img)
print("\n===== 检测结果 =====")
print(f"输入图片: {args.image_path}")
print(f"输出图片: {output_path}")
print(f"检测到 {len(detections)} 个对象:\n")
for i, det in enumerate(detections, 1):
print(f"{i}. 类别: {det['class']}")
print(f" 置信度: {det['confidence']:.4f}")
print(f" 位置: x={det['bbox'][0]}, y={det['bbox'][1]}")
print(f" 尺寸: w={det['bbox'][2]}, h={det['bbox'][3]}\n")
print("===================")
if __name__ == "__main__":
main()
- 终端执行指令
python3 yolov5_inference.py -m ./model/yolov5n-uint.nb -p ./model/test3.jpg
;
- 加载模型和目标图片、执行物体识别的推理、识别结果保存和打印等。
效果
运行指令结束后,生成识别结果图片

同时终端打印识别结果信息

详见底部视频。
更多场景的推理测试如下

总结
本文介绍了正点原子 STM32MP257 开发板基于 YOLO-v5n 模型实现物体识别的项目设计,包括YOLOv5模型介绍、官方Demo例程测试、摄像头采集画面的动态识别、板端图片静态识别等,为该开发板在人工智能等相关领域的开发、产品的设计和应用等方面提供了参考。