發生一件神奇的事件,訓練yolov8在讀取幾張照片後發生錯誤如下:
原因是開啟多核心運算超過機器本身擁有的資源
不指定workers,預設值為8 workers,但錯誤訊息會讓人以為圖片存取有問題
DataLoader worker (pid(s) 2984, 5496, 5092) exited unexpectedly
def main():
# Load the model.
model = YOLO('yolov8m.pt')
# Training.
# results = model.train(data='data_custom.yaml', imgsz=736, epochs=30000, batch=1, name='yolov8m', patience=0)
model.train(data='data_custom.yaml', imgsz=736, epochs=30000, batch=1, workers=16, name='yolov8m', patience=0)
Image sizes 736 train, 736 val
Using 16 dataloader workers
Logging results to runs\detect\yolov8m18
Starting training for 30000 epochs...
Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size
1/30000 0.902G 2.325 7.04 1.855 2 736: 100%|██████████| 21/21 [00:45<00:00, 2.
Class Images Instances Box(P R mAP50 mAP50-95): 33%|███▎ | 2/6 [00:05<0
Traceback (most recent call last):
File "C:\Users\me123\anaconda3\envs\yolov8\Lib\site-packages\torch\utils\data\dataloader.py", line 1243, in _try_get_data
data = self._data_queue.get(timeout=timeout)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\me123\anaconda3\envs\yolov8\Lib\queue.py", line 179, in get
raise Empty
_queue.Empty
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\waferData\degas\train.py", line 13, in <module>
main()
File "C:\waferData\degas\train.py", line 10, in main
model.train(data='data_custom.yaml', imgsz=736, epochs=30000, batch=1, workers=16, name='yolov8m', patience=0)
File "C:\Users\me123\anaconda3\envs\yolov8\Lib\site-packages\ultralytics\engine\model.py", line 802, in train
self.trainer.train()
File "C:\Users\me123\anaconda3\envs\yolov8\Lib\site-packages\ultralytics\engine\trainer.py", line 207, in train
self._do_train(world_size)
File "C:\Users\me123\anaconda3\envs\yolov8\Lib\site-packages\ultralytics\engine\trainer.py", line 437, in _do_train
self.metrics, self.fitness = self.validate()
^^^^^^^^^^^^^^^
File "C:\Users\me123\anaconda3\envs\yolov8\Lib\site-packages\ultralytics\engine\trainer.py", line 600, in validate
metrics = self.validator(self)
^^^^^^^^^^^^^^^^^^^^
File "C:\Users\me123\anaconda3\envs\yolov8\Lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\me123\anaconda3\envs\yolov8\Lib\site-packages\ultralytics\engine\validator.py", line 171, in __call__
for batch_i, batch in enumerate(bar):
^^^^^^^^^^^^^^
File "C:\Users\me123\anaconda3\envs\yolov8\Lib\site-packages\tqdm\std.py", line 1181, in __iter__
for obj in iterable:
^^^^^^^^
File "C:\Users\me123\anaconda3\envs\yolov8\Lib\site-packages\ultralytics\data\build.py", line 48, in __iter__
yield next(self.iterator)
^^^^^^^^^^^^^^^^^^^
File "C:\Users\me123\anaconda3\envs\yolov8\Lib\site-packages\torch\utils\data\dataloader.py", line 701, in __next__
data = self._next_data()
^^^^^^^^^^^^^^^^^
File "C:\Users\me123\anaconda3\envs\yolov8\Lib\site-packages\torch\utils\data\dataloader.py", line 1448, in _next_data
idx, data = self._get_data()
^^^^^^^^^^^^^^^^
File "C:\Users\me123\anaconda3\envs\yolov8\Lib\site-packages\torch\utils\data\dataloader.py", line 1402, in _get_data
success, data = self._try_get_data()
^^^^^^^^^^^^^^^^^^^^
File "C:\Users\me123\anaconda3\envs\yolov8\Lib\site-packages\torch\utils\data\dataloader.py", line 1256, in _try_get_data
raise RuntimeError(
RuntimeError: DataLoader worker (pid(s) 2984, 5496, 5092) exited unexpectedly