autodistill
autodistill copied to clipboard
Corrupt JPEG data
Search before asking
- [X] I have searched the Autodistill issues and found no similar bug report.
Bug
Hi
I am following this tutorial: https://medium.com/@corpy.ai.lab/autodistill-automating-dataset-labeling-for-efficient-model-training-6aed5f63bea
and I am facing an error with the converted jpg
images. This is from helpers.py's split_data function.
When I am done running this snippet:
self.base_model.label(
input_folder=config.image_dir,
output_folder=config.dataset_dir,
extension=".png",
)
I run the following snippet:
sv.DetectionDataset.from_yolo(
images_directory_path=config.image_path,
annotations_directory_path=config.annotation_path,
data_yaml_path=config.data_yaml_path,
)
I get this error:
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: 1 extraneous bytes before marker 0xd9
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: premature end of data segment
Corrupt JPEG data: 3 extraneous bytes before marker 0xd9
If I use the original png
images it works fine. I assume the conversion happens to compress the images? as far as I know yolo supports png images.
Environment
- OS: macos 14.1
- autodistil: 0.1.15
- supervision: 0.16.0
- Python: 3.8.13
Minimal Reproducible Example
import supervision as sv
from tqdm import tqdm
from pathlib import Path
from typing import Union
from autodistill_yolov8 import YOLOv8
from omegaconf import OmegaConf, DictConfig
from autodistill.detection import CaptionOntology
from autodistill_grounded_sam import GroundedSAM
from autodistill_grounding_dino import GroundingDINO
class AUTODISTILLATION:
def __init__(self, config: Union[str, DictConfig]) -> None:
if isinstance(config, DictConfig):
self.config = config
else:
self.config = OmegaConf.load(config)
self.onotology = CaptionOntology(self.config.labels)
self.base_model = GroundingDINO(ontology=self.onotology)
self.target_model = YOLOv8(self.config.target_model)
@staticmethod
def extract_frame(config: DictConfig):
train_video_path = [config.train_video_path]
image_dir = config.image_dir
frame_stride = config.frame_stride
for video_path in tqdm(train_video_path):
if isinstance(video_path, str):
video_path = Path(video_path)
video_name = video_path.stem
image_name_pattern = video_name + "-{:05d}.png"
with sv.ImageSink(
target_dir_path=image_dir, image_name_pattern=image_name_pattern
) as sink:
for image in sv.get_video_frames_generator(
source_path=str(video_path), stride=frame_stride
):
sink.save_image(image=image)
def label_dataset(self, config: DictConfig):
print("[INFO] Start labelling")
self.base_model.label(
input_folder=config.image_dir,
output_folder=config.dataset_dir,
extension=".png",
)
def dataset_to_yolo(self, config: DictConfig):
print("[INFO] Converting dataset to yolo format")
sv.DetectionDataset.from_yolo(
images_directory_path=config.image_path,
annotations_directory_path=config.annotation_path,
data_yaml_path=config.data_yaml_path,
)
def make_dataset(self, config: DictConfig):
print("[INFO] Making dataset start ...")
image_dir = Path(config.image_dir)
if not any(image_dir.iterdir()):
self.extract_frame(config)
self.label_dataset(config)
self.dataset_to_yolo(config)
def train(self, config: DictConfig):
print("[INFO] Training ....")
self.target_model.train(
config.data_yaml_path, epochs=config.epochs
)
def inference(self, source, conf):
self.target_model.predict(source, conf)
if __name__ == "__main__":
config_path = "config.yaml"
config = OmegaConf.load(config_path)
distillation = AUTODISTILLATION(config=config)
distillation.make_dataset(config=distillation.config)
# distillation.train(config=distillation.config)
# distillation.inference(source=distillation.config.test_video_path, conf=distillation.config.conf)
Additional
No response
Are you willing to submit a PR?
- [X] Yes I'd like to help by submitting a PR!
Hello! Thank you for creating this Issue. Is it possible for you to share an example image from your dataset that raised the Corrupt JPEG data
warning?
@ashaheedq Do you still require assistance? If not, can we close this issue?