Ultralytics YOLOv5 ??, AGPL-3.0 license
"""
Run YOLOv5 detection inference on images, videos, directories, globs, YouTube, webcam, streams, etc.
Usage - sources:
$ python detect.py --weights yolov5s.pt --source 0 # webcam
img.jpg # image
vid.mp4 # video
screen # screenshot
path/ # directory
list.txt # list of images
list.streams # list of streams
'path/*.jpg' # glob
'https://youtu.be/LNwODJXcvt4' # YouTube
'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream
Usage - formats:
$ python detect.py --weights yolov5s.pt # PyTorch
yolov5s.torchscript # TorchScript
yolov5s.onnx # ONNX Runtime or OpenCV DNN with --dnn
yolov5s_openvino_model # OpenVINO
yolov5s.engine # TensorRT
yolov5s.mlmodel # CoreML (macOS-only)
yolov5s_saved_model # TensorFlow SavedModel
yolov5s.pb # TensorFlow GraphDef
yolov5s.tflite # TensorFlow Lite
yolov5s_edgetpu.tflite # TensorFlow Edge TPU
yolov5s_paddle_model # PaddlePaddle
"""
-- coding: utf-8 --
import argparse
import csv
import os
import platform
import sys
from pathlib import Path
from playsound import playsound
import torch
FILE = Path(file).resolve()
ROOT = FILE.parents[0] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from ultralytics.utils.plotting import Annotator, colors, save_one_box
from models.common import DetectMultiBackend
from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
from utils.general import (
LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh
)
from utils.torch_utils import select_device, smart_inference_mode
@smart_inference_mode()
def run(
weights=ROOT / "best.pt", # model path or triton URL
source=ROOT / "data/images", # file/dir/URL/glob/screen/0(webcam)
data=ROOT / "data/coco128.yaml", # dataset.yaml path
imgsz=(320, 320), # inference size (height, width)
conf_thres=0.25, # confidence threshold
iou_thres=0.45, # NMS IOU threshold
max_det=1000, # maximum detections per image
device="", # cuda device, i.e. 0 or 0,1,2,3 or cpu
view_img=False, # show results
save_txt=False, # save results to *.txt
save_csv=False, # save results in CSV format
save_conf=False, # save confidences in --save-txt labels
save_crop=False, # save cropped prediction boxes
nosave=False, # do not save images/videos
classes=None, # filter by class: --class 0, or --class 0 2 3
agnostic_nms=False, # class-agnostic NMS
augment=False, # augmented inference
visualize=False, # visualize features
update=False, # update all models
project=ROOT / "runs/detect", # save results to project/name
name="exp", # save results to project/name
exist_ok=False, # existing project/name ok, do not increment
line_thickness=3, # bounding box thickness (pixels)
hide_labels=False, # hide labels
hide_conf=False, # hide confidences
half=False, # use FP16 half-precision inference
dnn=False, # use OpenCV DNN for ONNX inference
vid_stride=2, # video frame-rate stride
):
"""
Runs YOLOv5 detection inference on various sources like images, videos, directories, streams, etc.
Args:
weights (str | Path): Path to the model weights file or a Triton URL. Default is 'yolov5s.pt'.
source (str | Path): Input source, which can be a file, directory, URL, glob pattern, screen capture, or webcam index.
Default is 'data/images'.
data (str | Path): Path to the dataset YAML file. Default is 'data/coco128.yaml'.
imgsz (tuple[int, int]): Inference image size as a tuple (height, width). Default is (640, 640).
conf_thres (float): Confidence threshold for detections. Default is 0.25.
iou_thres (float): Intersection Over Union (IOU) threshold for non-max suppression. Default is 0.45.
max_det (int): Maximum number of detections per image. Default is 1000.
device (str): CUDA device identifier (e.g., '0' or '0,1,2,3') or 'cpu'. Default is an empty string, which
uses the best available device.
view_img (bool): If True, display inference results using OpenCV. Default is False.
save_txt (bool): If True, save results in a text file. Default is False.
save_csv (bool): If True, save results in a CSV file. Default is False.
save_conf (bool): If True, include confidence scores in the saved results. Default is False.
save_crop (bool): If True, save cropped prediction boxes. Default is False.
nosave (bool): If True, do not save inference images or videos. Default is False.
classes (list[int]): List of class indices to filter detections by. Default is None.
agnostic_nms (bool): If True, perform class-agnostic non-max suppression. Default is False.
augment (bool): If True, use augmented inference. Default is False.
visualize (bool): If True, visualize feature maps. Default is False.
update (bool): If True, update all models' weights. Default is False.
project (str | Path): Directory to save results. Default is 'runs/detect'.
name (str): Name of the current experiment; used to create a subdirectory within 'project'. Default is 'exp'.
exist_ok (bool): If True, existing directories with the same name are reused instead of being incremented. Default is
False.
line_thickness (int): Thickness of bounding box lines in pixels. Default is 3.
hide_labels (bool): If True, do not display labels on bounding boxes. Default is False.
hide_conf (bool): If True, do not display confidence scores on bounding boxes. Default is False.
half (bool): If True, use FP16 half-precision inference. Default is False.
dnn (bool): If True, use OpenCV DNN backend for ONNX inference. Default is False.
vid_stride (int): Stride for processing video frames, to skip frames between processing. Default is 1.
Returns:
None
Examples:
```python
from ultralytics import run
# Run inference on an image
run(source='data/images/example.jpg', weights='yolov5s.pt', device='0')
# Run inference on a video with specific confidence threshold
run(source='data/videos/example.mp4', weights='yolov5s.pt', conf_thres=0.4, device='0')
```
"""
source = str(source)
save_img = not nosave and not source.endswith(".txt") # save inference images
is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
is_url = source.lower().startswith(("rtsp://", "rtmp://", "http://", "https://"))
webcam = source.isnumeric() or source.endswith(".streams") or (is_url and not is_file)
screenshot = source.lower().startswith("screen")
if is_url and is_file:
source = check_file(source) # download
# Directories
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
(save_dir / "labels" if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
# Load model
device = select_device(device)
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
stride, names, pt = model.stride, model.names, model.pt
model.names = names if isinstance(names, list) else names.names if hasattr(names, "names") else []
# Dataloader
dataset = LoadImages(source, img_size=imgsz, stride=int(stride), auto=webcam or screenshot)
dataset = LoadScreenshots(source, img_size=imgsz, stride=int(stride), auto=webcam or screenshot) if screenshot else dataset
dataset = LoadStreams(str(source), img_size=imgsz, stride=int(stride), auto=webcam or screenshot) if webcam else dataset
# Run inference
if pt and not update:
if visualize:
model.visualize = True
results, times = Profile(model.run, args=(dataset,), kwargs={'conf_thres': conf_thres, 'iou_thres': iou_thres, 'max_det': max_det})
else:
if visualize:
raise TypeError('Visualizing is not supported for this model')
results, times = Profile(model.run, args=(dataset,), kwargs={'conf_thres': conf_thres, 'iou_thres': iou_thres, 'max_det': max_det})
if update:
strip_optimizer(weights) # update model (to fix SourceChangeWarning)
# Process results
for path, out, imgsz in zip(dataset.paths, results.xywh, dataset.img_size):
labels = []
for *box, conf, cls in out.tolist():
if save_txt or save_img or save_crop: # Write to file
xywh = xyxy2xywh(box) # xyxy to xywh
xywh[2:] *= imgsz # normalized to pixel space
line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format
labels.append(f"{cls} {xywh[0]:g} {xywh[1]:g} {xywh[2]:g} {xywh[3]:g} {conf:g}\n")
if save_crop:
save_one_box(xywh, Path(path).with_suffix('.jpg'), None, line_thickness=line_thickness)
if save_img:
save_one_box(xywh, Path(str(save_dir) + Path(path).name), None, line_thickness=line_thickness)
if save_conf:
with open(Path(save_dir) / 'labels' / Path(path).stem + '.txt', 'a') as file:
file.write(labels)
if view_img: # Add bbox to image
save_one_box(xywh, path, names[int(cls)], line_thickness=line_thickness, hide_labels=hide_labels, hide_conf=hide_conf)
if view_img:
Annotator(path, line_thickness=line_thickness).save()
# Display results
for path, img, det, im0s in zip(dataset.paths, dataset.imgs, results.xywh, dataset.im0s):
for *box, conf, cls in det.tolist():
label = None if hide_labels else (names[int(cls)], conf) if hide_conf else (f'{names[int(cls)]} {conf:.2f}')
plot_one_box(box, img, color=colors(int(cls)), label=label, line_thickness=line_thickness)
cv2.imshow(imgsz, im0s)
# Save results (image with detections)
if save_img:
LOGGER.info(f'Saving results to {str(save_dir)}')
if save_txt or save_csv:
LOGGER.info('Saving labels')
with open(save_dir / 'labels.txt', 'w') as file:
for img, lines in zip(dataset.imgs, results.xywh):
file.write(f"{img}: {lines}\n")
if save_csv:
LOGGER.info('Converting labels to CSV format')
with open(save_dir / 'labels.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(['image_id', 'prediction'])
for img, lines in zip(dataset.imgs, results.xywh):
writer.writerow([img, lines])
# Play sound
if str(weights).endswith('.tflite'):
LOGGER.info('Detect sound')
playsound('gun.wav')
# Print results
for i, (path, out, imgsz, im0s) in enumerate(zip(dataset.paths, results.xywh, dataset.img_size, dataset.im0s)):
LOGGER.info(f'{i + 1}/{len(dataset)}: {path.stem} ({im0s.shape[1]}x{im0s.shape[0]}) {len(out)} detections, {times[i]:.3f} s')
def parse_opt():
"""
Parses command-line arguments for YOLOv5 detection, setting inference options and model configurations.
Args:
--weights (str | list[str], optional): Model path or Triton URL. Defaults to ROOT / 'yolov5s.pt'.
--source (str, optional): File/dir/URL/glob/screen/0(webcam). Defaults to ROOT / 'data/images'.
--data (str, optional): Dataset YAML path. Provides dataset configuration information.
--imgsz (list[int], optional): Inference size (height, width). Defaults to [640].
--conf-thres (float, optional): Confidence threshold. Defaults to 0.25.
--iou-thres (float, optional): NMS IoU threshold. Defaults to 0.45.
--max-det (int, optional): Maximum number of detections per image. Defaults to 1000.
--device (str, optional): CUDA device, i.e., '0' or '0,1,2,3' or 'cpu'. Defaults to "".
--view-img (bool, optional): Flag to display results. Defaults to False.
--save-txt (bool, optional): Flag to save results to *.txt files. Defaults to False.
--save-csv (bool, optional): Flag to save results in CSV format. Defaults to False.
--save-conf (bool, optional): Flag to save confidences in labels saved via --save-txt. Defaults to False.
--save-crop (bool, optional): Flag to save cropped prediction boxes. Defaults to False.
--nosave (bool, optional): Flag to prevent saving images/videos. Defaults to False.
--classes (list[int], optional): List of classes to filter results by, e.g., '--classes 0 2 3'. Defaults to None.
--agnostic-nms (bool, optional): Flag for class-agnostic NMS. Defaults to False.
--augment (bool, optional): Flag for augmented inference. Defaults to False.
--visualize (bool, optional): Flag for visualizing features. Defaults to False.
--update (bool, optional): Flag to update all models in the model directory. Defaults to False.
--project (str, optional): Directory to save results. Defaults to ROOT / 'runs/detect'.
--name (str, optional): Sub-directory name for saving results within --project. Defaults to 'exp'.
--exist-ok (bool, optional): Flag to allow overwriting if the project/name already exists. Defaults to False.
--line-thickness (int, optional): Thickness (in pixels) of bounding boxes. Defaults to 3.
--hide-labels (bool, optional): Flag to hide labels in the output. Defaults to False.
--hide-conf (bool, optional): Flag to hide confidences in the output. Defaults to False.
--half (bool, optional): Flag to use FP16 half-precision inference. Defaults to False.
--dnn (bool, optional): Flag to use OpenCV DNN for ONNX inference. Defaults to False.
--vid-stride (int, optional): Video frame-rate stride, determining the number of frames to skip in between consecutive frames. Defaults to 1.
Returns:
argparse.Namespace: Parsed command-line arguments as an argparse.Namespace object.
Example:
```python
from ultralytics import YOLOv5
args = YOLOv5.parse_opt()
```
"""
parser = argparse.ArgumentParser()
parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s.pt", help="model path or triton URL")
parser.add_argument("--source", type=str, default=ROOT / "data/images", help="file/dir/URL/glob/screen/0(webcam)")
parser.add_argument("--data", type=str, default=ROOT / "data/coco128.yaml", help="(optional) dataset.yaml path")
parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[640], help="inference size h,w")
parser.add_argument("--conf-thres", type=float, default=0.25, help="confidence threshold")
parser.add_argument("--iou-thres", type=float, default=0.45, help="NMS IoU threshold")
parser.add_argument("--max-det", type=int, default=1000, help="maximum detections per image")
parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
parser.add_argument("--view-img", action="store_true", help="show results")
parser.add_argument("--save-txt", action="store_true", help="save results to *.txt")
parser.add_argument("--save-csv", action="store_true", help="save results in CSV format")
parser.add_argument("--save-conf", action="store_true", help="save confidences in --save-txt labels")
parser.add_argument("--save-crop", action="store_true", help="save cropped prediction boxes")
parser.add_argument("--nosave", action="store_true", help="do not save images/videos")
parser.add_argument("--classes", nargs="+", type=int, help="filter by class: --classes 0, or --classes 0 2 3")
parser.add_argument("--agnostic-nms", action="store_true", help="class-agnostic NMS")
parser.add_argument("--augment", action="store_true", help="augmented inference")
parser.add_argument("--visualize", action="store_true", help="visualize features")
parser.add_argument("--update", action="store_true", help="update all models")
parser.add_argument("--project", default=ROOT / "runs/detect", help="save results to project/name")
parser.add_argument("--name", default="exp", help="save results to project/name")
parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
parser.add_argument("--line-thickness", default=3, type=int, help="bounding box thickness (pixels)")
parser.add_argument("--hide-labels", default=False, action="store_true", help="hide labels")
parser.add_argument("--hide-conf", default=False, action="store_true", help="hide confidences")
parser.add_argument("--half", action="store_true", help="use FP16 half-precision inference")
parser.add_argument("--dnn", action="store_true", help="use OpenCV DNN for ONNX inference")
parser.add_argument("--vid-stride", type=int, default=1, help="video frame-rate stride")
opt = parser.parse_args()
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
print_args(vars(opt))
return opt
def main(opt):
"""
Executes YOLOv5 model inference based on provided command-line arguments, validating dependencies before running.
Args:
opt (argparse.Namespace): Command-line arguments for YOLOv5 detection. See function `parse_opt` for details.
Returns:
None
Note:
This function performs essential pre-execution checks and initiates the YOLOv5 detection process based on user-specified options.
Refer to the usage guide and examples for more information about different sources and formats at:
https://github.com/ultralytics/ultralytics
Example usage:
```python
if __name__ == "__main__":
opt = parse_opt()
main(opt)
```
"""
check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
run(**vars(opt))
if name == "main":
opt = parse_opt()
main(opt)
이게 젯슨나노에서 yolov5를 통해 객체탐지를 하고 실시간으로 웹캠으로 보여지게 하는데 객체 탐지되면 audio.mp3를 통해 소리를 나게 하였습니다 근데 에러코드로 user@user:~/yolov5$ python3.8 detect.py --source 0 --weights best.pt --imgsz 320
detect: weights=['best.pt'], source=0, data=data/coco128.yaml, imgsz=[320, 320],txt=False, save_csv=False, save_conf=False, save_crop=False, nosave=False, classject=runs/detect, name=exp, exist_ok=False, line_thickness=3, hide_labels=False,
YOLOv5 v7.0-339-g150a1a31 Python-3.8.12 torch-2.3.1 CPU
Fusing layers...
YOLOv5s summary: 157 layers, 7018216 parameters, 0 gradients, 15.8 GFLOPs
Traceback (most recent call last):
File "detect.py", line 335, in
main(opt)
File "detect.py", line 330, in main
run(**vars(opt))
File "/home/user/.local/lib/python3.8/site-packages/torch/utils/_contextlib.py
return func(*args, **kwargs)
File "detect.py", line 158, in run
dataset = LoadImages(source, img_size=imgsz, stride=int(stride), auto=webcam
File "/home/user/yolov5/utils/dataloaders.py", line 335, in init
raise FileNotFoundError(f"{p} does not exist")
FileNotFoundError: /home/user/yolov5/0 does not exist
이렇게 뜨는데 어떻게 해결해야되나요 mp3코드를 추가하기전까지는 웹캠이 작동되었습니다