File size: 6,139 Bytes
b056a88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
"""
Visual Question Answering Streamlit Application
"""

import logging
import os
import sys
import time
from datetime import datetime

import streamlit as st
from PIL import Image

# Configure path to include parent directory
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

# Configure logging
log_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "logs")
os.makedirs(log_dir, exist_ok=True)
log_file = os.path.join(
    log_dir, f"vqa_app_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
)

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    handlers=[logging.FileHandler(log_file), logging.StreamHandler()],
)
logger = logging.getLogger("vqa_app")

# Import modules
from models import VQAInference
from utils.image_utils import resize_image

# Global variables
MODEL_OPTIONS = {"BLIP": "blip", "ViLT": "vilt"}

# Setup directories
uploads_dir = os.path.join(
    os.path.dirname(os.path.abspath(__file__)), "static", "uploads"
)
os.makedirs(uploads_dir, exist_ok=True)

# Configure page
st.set_page_config(
    page_title="Visual Question Answering",
    page_icon="🔍",
    layout="wide",
    initial_sidebar_state="expanded",
)


@st.cache_resource
def load_model(model_name):
    """Load the VQA model with caching for better performance"""
    try:
        logger.info(f"Loading model: {model_name}")
        return VQAInference(model_name=model_name)
    except Exception as e:
        logger.error(f"Error loading model: {str(e)}")
        st.error(f"Failed to load model: {str(e)}")
        return None


def process_image_and_question(image_file, question, model_name):
    """Process the uploaded image and question to generate an answer"""
    start_time = time.time()

    try:
        # Load image
        image = Image.open(image_file).convert("RGB")
        logger.info(f"Image loaded, size: {image.size}")

        # Resize image
        image = resize_image(image)
        logger.info(f"Image resized to: {image.size}")

        # Load model
        model = load_model(model_name)
        if model is None:
            return None

        # Generate answer
        logger.info(f"Generating answer for question: '{question}'")
        answer = model.predict(image, question)
        logger.info(f"Answer generated: '{answer}'")

        # Calculate processing time
        processing_time = time.time() - start_time

        return {"answer": answer, "processing_time": f"{processing_time:.2f} seconds"}
    except Exception as e:
        logger.error(f"Error processing request: {str(e)}", exc_info=True)
        return None


def main():
    """Main function for Streamlit app"""
    # Header
    st.title("Visual Question Answering")
    st.markdown("Upload an image, ask a question, and get AI-powered answers")

    # Sidebar for model selection
    st.sidebar.title("Model Options")
    selected_model_name = st.sidebar.radio(
        "Choose a model:", options=list(MODEL_OPTIONS.keys()), index=0
    )
    model_name = MODEL_OPTIONS[selected_model_name]

    st.sidebar.markdown("---")
    st.sidebar.markdown("## About the Models")
    st.sidebar.markdown("**BLIP**: General purpose VQA with free-form answers")
    st.sidebar.markdown("**ViLT**: Better for yes/no questions and specific categories")

    # Main content - two columns
    col1, col2 = st.columns([1, 1])

    with col1:
        st.markdown("### Upload & Ask")
        uploaded_file = st.file_uploader(
            "Upload an image:", type=["jpg", "jpeg", "png", "bmp", "gif"]
        )

        question = st.text_input(
            "Your question about the image:", placeholder="E.g., What is in this image?"
        )

        submit_button = st.button(
            "Get Answer", type="primary", use_container_width=True
        )

        # Preview uploaded image
        if uploaded_file is not None:
            st.markdown("### Image Preview")
            st.image(uploaded_file, caption="Uploaded Image",use_container_width=True)

    with col2:
        st.markdown("### AI Answer")

        # Process when submit button is clicked
        if submit_button and uploaded_file is not None and question:
            with st.spinner("Generating answer..."):
                result = process_image_and_question(uploaded_file, question, model_name)

                if result:
                    st.success("Answer generated successfully!")

                    # Display results
                    st.markdown("#### Question:")
                    st.write(question)

                    st.markdown("#### Answer:")
                    st.markdown(
                        f"<div style='background-color: #f0f2f6; padding: 20px; border-radius: 5px;'>{result['answer']}</div>",
                        unsafe_allow_html=True,
                    )

                    st.markdown("#### Processing Time:")
                    st.text(result["processing_time"])
                else:
                    st.error(
                        "Failed to generate an answer. Please check the image and question, and try again."
                    )

        elif not uploaded_file and submit_button:
            st.warning("Please upload an image first.")
        elif not question and submit_button:
            st.warning("Please enter a question about the image.")
        else:
            st.info("AI answers will appear here after you submit your question")

    # Information about the application
    st.markdown("---")
    st.markdown("### About Visual Question Answering")
    st.markdown("""
    This application uses multi-modal AI, combining computer vision and natural language processing 
    to answer questions about images. Here are some examples of questions you can ask:
    
    - **Objects**: "What objects are in this image?"
    - **Counting**: "How many people are in this image?"
    - **Colors**: "What color is the car?"
    - **Actions**: "What is the person doing?"
    - **Spatial relations**: "What is to the left of the chair?"
    - **Attributes**: "Is the cat sleeping?"
    """)


if __name__ == "__main__":
    main()