import io import requests from PIL import Image import torch import numpy import gradio as gr from transformers import DetrImageProcessor, DetrForSegmentation, AutoImageProcessor, AutoModelForImageClassification from transformers.models.detr.feature_extraction_detr import rgb_to_id url = "http://images.cocodataset.org/val2017/000000039769.jpg" image = Image.open(requests.get(url, stream=True).raw) # feature_extractor = DetrImageProcessor.from_pretrained("facebook/post_process_panoptic_segmentation") # model = DetrForSegmentation.from_pretrained("facebook/post_process_panoptic_segmentation") # # # prepare image for the model # inputs = feature_extractor(images=image, return_tensors="pt") # # # forward pass # outputs = model(**inputs) # # # use the `post_process_panoptic` method of `DetrFeatureExtractor` to convert to COCO format # processed_sizes = torch.as_tensor(inputs["pixel_values"].shape[-2:]).unsqueeze(0) # result = feature_extractor.post_process_panoptic(outputs, processed_sizes)[0] # # # the segmentation is stored in a special-format png # panoptic_seg = Image.open(io.BytesIO(result["png_string"])) # panoptic_seg = numpy.array(panoptic_seg, dtype=numpy.uint8) # # retrieve the ids corresponding to each mask # panoptic_seg_id = rgb_to_id(panoptic_seg) preprocessor = AutoImageProcessor.from_pretrained("google/mobilenet_v2_1.0_224") model = AutoModelForImageClassification.from_pretrained("google/mobilenet_v2_1.0_224") inputs = preprocessor(images=image, return_tensors="pt") outputs = model(**inputs) logits = outputs.logits # model predicts one of the 1000 ImageNet classes predicted_class_idx = logits.argmax(-1).item() print("Predicted class:", model.config.id2label[predicted_class_idx]) # gr.Image(image).launch()