import os import json import streamlit as st from groq import Groq from PIL import Image, UnidentifiedImageError, ExifTags import requests from io import BytesIO from transformers import pipeline from final_captioner import generate_final_caption import hashlib # Streamlit page title st.title("PicSamvaad : Image Conversational Chatbot") # # Load configuration # working_dir = os.path.dirname(os.path.abspath(__file__)) # config_data = json.load(open(f"{working_dir}/config.json")) # GROQ_API_KEY = config_data["GROQ_API_KEY"] # Save the API key to environment variable os.environ["GROQ_API_KEY"] = GROQ_API_KEY client = Groq() # Sidebar for image upload and URL input with st.sidebar: st.header("Upload Image or Enter URL") uploaded_file = st.file_uploader( "Upload an image to chat...", type=["jpg", "jpeg", "png"] ) url = st.text_input("Or enter a valid image URL...") image = None error_message = None def correct_image_orientation(img): try: for orientation in ExifTags.TAGS.keys(): if ExifTags.TAGS[orientation] == "Orientation": break exif = img._getexif() if exif is not None: orientation = exif[orientation] if orientation == 3: img = img.rotate(180, expand=True) elif orientation == 6: img = img.rotate(270, expand=True) elif orientation == 8: img = img.rotate(90, expand=True) except (AttributeError, KeyError, IndexError): pass return img def get_image_hash(image): # Generate a unique hash for the image img_bytes = image.tobytes() return hashlib.md5(img_bytes).hexdigest() # Check if a new image or URL has been provided and reset chat history if "last_uploaded_hash" not in st.session_state: st.session_state.last_uploaded_hash = None if uploaded_file is not None: image = Image.open(uploaded_file) image_hash = get_image_hash(image) if st.session_state.last_uploaded_hash != image_hash: st.session_state.chat_history = [] # Clear chat history st.session_state.last_uploaded_hash = image_hash # Update last uploaded hash image = correct_image_orientation(image) st.image(image, caption="Uploaded Image.", use_column_width=True) elif url: try: response = requests.get(url) response.raise_for_status() # Check if the request was successful image = Image.open(BytesIO(response.content)) image_hash = get_image_hash(image) if st.session_state.last_uploaded_hash != image_hash: st.session_state.chat_history = [] # Clear chat history st.session_state.last_uploaded_hash = ( image_hash # Update last uploaded hash ) image = correct_image_orientation(image) st.image(image, caption="Image from URL.", use_column_width=True) except (requests.exceptions.RequestException, UnidentifiedImageError) as e: image = None error_message = "Error: The provided URL is invalid or the image could not be loaded. Sometimes some image URLs don't work. We suggest you upload the downloaded image instead ;)" caption = "" if image is not None: caption += generate_final_caption(image) st.write("ChatBot : " + caption) # Display error message if any if error_message: st.error(error_message) # Initialize chat history in Streamlit session state if not present already if "chat_history" not in st.session_state: st.session_state.chat_history = [] # Display chat history for message in st.session_state.chat_history: with st.chat_message(message["role"]): st.markdown(message["content"]) # Input field for user's message user_prompt = st.chat_input("Ask the Chatbot about the image...") if user_prompt: st.chat_message("user").markdown(user_prompt) st.session_state.chat_history.append({"role": "user", "content": user_prompt}) # Send user's message to the LLM and get a response messages = [ { "role": "system", "content": "You are a helpful, accurate image conversational assistant. You don't hallucinate, and your answers are very precise and have a positive approach.The caption of the image is: " + caption, }, *st.session_state.chat_history, ] response = client.chat.completions.create( model="llama-3.1-8b-instant", messages=messages ) assistant_response = response.choices[0].message.content st.session_state.chat_history.append( {"role": "assistant", "content": assistant_response} ) # Display the LLM's response with st.chat_message("assistant"): st.markdown(assistant_response)