Amarthya7 commited on
Commit
b056a88
·
verified ·
1 Parent(s): f165d25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +189 -189
app.py CHANGED
@@ -1,189 +1,189 @@
1
- """
2
- Visual Question Answering Streamlit Application
3
- """
4
-
5
- import logging
6
- import os
7
- import sys
8
- import time
9
- from datetime import datetime
10
-
11
- import streamlit as st
12
- from PIL import Image
13
-
14
- # Configure path to include parent directory
15
- sys.path.append(os.path.dirname(os.path.abspath(__file__)))
16
-
17
- # Configure logging
18
- log_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "logs")
19
- os.makedirs(log_dir, exist_ok=True)
20
- log_file = os.path.join(
21
- log_dir, f"vqa_app_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
22
- )
23
-
24
- logging.basicConfig(
25
- level=logging.INFO,
26
- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
27
- handlers=[logging.FileHandler(log_file), logging.StreamHandler()],
28
- )
29
- logger = logging.getLogger("vqa_app")
30
-
31
- # Import modules
32
- from models import VQAInference
33
- from utils.image_utils import resize_image
34
-
35
- # Global variables
36
- MODEL_OPTIONS = {"BLIP": "blip", "ViLT": "vilt"}
37
-
38
- # Setup directories
39
- uploads_dir = os.path.join(
40
- os.path.dirname(os.path.abspath(__file__)), "static", "uploads"
41
- )
42
- os.makedirs(uploads_dir, exist_ok=True)
43
-
44
- # Configure page
45
- st.set_page_config(
46
- page_title="Visual Question Answering",
47
- page_icon="🔍",
48
- layout="wide",
49
- initial_sidebar_state="expanded",
50
- )
51
-
52
-
53
- @st.cache_resource
54
- def load_model(model_name):
55
- """Load the VQA model with caching for better performance"""
56
- try:
57
- logger.info(f"Loading model: {model_name}")
58
- return VQAInference(model_name=model_name)
59
- except Exception as e:
60
- logger.error(f"Error loading model: {str(e)}")
61
- st.error(f"Failed to load model: {str(e)}")
62
- return None
63
-
64
-
65
- def process_image_and_question(image_file, question, model_name):
66
- """Process the uploaded image and question to generate an answer"""
67
- start_time = time.time()
68
-
69
- try:
70
- # Load image
71
- image = Image.open(image_file).convert("RGB")
72
- logger.info(f"Image loaded, size: {image.size}")
73
-
74
- # Resize image
75
- image = resize_image(image)
76
- logger.info(f"Image resized to: {image.size}")
77
-
78
- # Load model
79
- model = load_model(model_name)
80
- if model is None:
81
- return None
82
-
83
- # Generate answer
84
- logger.info(f"Generating answer for question: '{question}'")
85
- answer = model.predict(image, question)
86
- logger.info(f"Answer generated: '{answer}'")
87
-
88
- # Calculate processing time
89
- processing_time = time.time() - start_time
90
-
91
- return {"answer": answer, "processing_time": f"{processing_time:.2f} seconds"}
92
- except Exception as e:
93
- logger.error(f"Error processing request: {str(e)}", exc_info=True)
94
- return None
95
-
96
-
97
- def main():
98
- """Main function for Streamlit app"""
99
- # Header
100
- st.title("Visual Question Answering")
101
- st.markdown("Upload an image, ask a question, and get AI-powered answers")
102
-
103
- # Sidebar for model selection
104
- st.sidebar.title("Model Options")
105
- selected_model_name = st.sidebar.radio(
106
- "Choose a model:", options=list(MODEL_OPTIONS.keys()), index=0
107
- )
108
- model_name = MODEL_OPTIONS[selected_model_name]
109
-
110
- st.sidebar.markdown("---")
111
- st.sidebar.markdown("## About the Models")
112
- st.sidebar.markdown("**BLIP**: General purpose VQA with free-form answers")
113
- st.sidebar.markdown("**ViLT**: Better for yes/no questions and specific categories")
114
-
115
- # Main content - two columns
116
- col1, col2 = st.columns([1, 1])
117
-
118
- with col1:
119
- st.markdown("### Upload & Ask")
120
- uploaded_file = st.file_uploader(
121
- "Upload an image:", type=["jpg", "jpeg", "png", "bmp", "gif"]
122
- )
123
-
124
- question = st.text_input(
125
- "Your question about the image:", placeholder="E.g., What is in this image?"
126
- )
127
-
128
- submit_button = st.button(
129
- "Get Answer", type="primary", use_container_width=True
130
- )
131
-
132
- # Preview uploaded image
133
- if uploaded_file is not None:
134
- st.markdown("### Image Preview")
135
- st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
136
-
137
- with col2:
138
- st.markdown("### AI Answer")
139
-
140
- # Process when submit button is clicked
141
- if submit_button and uploaded_file is not None and question:
142
- with st.spinner("Generating answer..."):
143
- result = process_image_and_question(uploaded_file, question, model_name)
144
-
145
- if result:
146
- st.success("Answer generated successfully!")
147
-
148
- # Display results
149
- st.markdown("#### Question:")
150
- st.write(question)
151
-
152
- st.markdown("#### Answer:")
153
- st.markdown(
154
- f"<div style='background-color: #f0f2f6; padding: 20px; border-radius: 5px;'>{result['answer']}</div>",
155
- unsafe_allow_html=True,
156
- )
157
-
158
- st.markdown("#### Processing Time:")
159
- st.text(result["processing_time"])
160
- else:
161
- st.error(
162
- "Failed to generate an answer. Please check the image and question, and try again."
163
- )
164
-
165
- elif not uploaded_file and submit_button:
166
- st.warning("Please upload an image first.")
167
- elif not question and submit_button:
168
- st.warning("Please enter a question about the image.")
169
- else:
170
- st.info("AI answers will appear here after you submit your question")
171
-
172
- # Information about the application
173
- st.markdown("---")
174
- st.markdown("### About Visual Question Answering")
175
- st.markdown("""
176
- This application uses multi-modal AI, combining computer vision and natural language processing
177
- to answer questions about images. Here are some examples of questions you can ask:
178
-
179
- - **Objects**: "What objects are in this image?"
180
- - **Counting**: "How many people are in this image?"
181
- - **Colors**: "What color is the car?"
182
- - **Actions**: "What is the person doing?"
183
- - **Spatial relations**: "What is to the left of the chair?"
184
- - **Attributes**: "Is the cat sleeping?"
185
- """)
186
-
187
-
188
- if __name__ == "__main__":
189
- main()
 
1
+ """
2
+ Visual Question Answering Streamlit Application
3
+ """
4
+
5
+ import logging
6
+ import os
7
+ import sys
8
+ import time
9
+ from datetime import datetime
10
+
11
+ import streamlit as st
12
+ from PIL import Image
13
+
14
+ # Configure path to include parent directory
15
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
16
+
17
+ # Configure logging
18
+ log_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "logs")
19
+ os.makedirs(log_dir, exist_ok=True)
20
+ log_file = os.path.join(
21
+ log_dir, f"vqa_app_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
22
+ )
23
+
24
+ logging.basicConfig(
25
+ level=logging.INFO,
26
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
27
+ handlers=[logging.FileHandler(log_file), logging.StreamHandler()],
28
+ )
29
+ logger = logging.getLogger("vqa_app")
30
+
31
+ # Import modules
32
+ from models import VQAInference
33
+ from utils.image_utils import resize_image
34
+
35
+ # Global variables
36
+ MODEL_OPTIONS = {"BLIP": "blip", "ViLT": "vilt"}
37
+
38
+ # Setup directories
39
+ uploads_dir = os.path.join(
40
+ os.path.dirname(os.path.abspath(__file__)), "static", "uploads"
41
+ )
42
+ os.makedirs(uploads_dir, exist_ok=True)
43
+
44
+ # Configure page
45
+ st.set_page_config(
46
+ page_title="Visual Question Answering",
47
+ page_icon="🔍",
48
+ layout="wide",
49
+ initial_sidebar_state="expanded",
50
+ )
51
+
52
+
53
+ @st.cache_resource
54
+ def load_model(model_name):
55
+ """Load the VQA model with caching for better performance"""
56
+ try:
57
+ logger.info(f"Loading model: {model_name}")
58
+ return VQAInference(model_name=model_name)
59
+ except Exception as e:
60
+ logger.error(f"Error loading model: {str(e)}")
61
+ st.error(f"Failed to load model: {str(e)}")
62
+ return None
63
+
64
+
65
+ def process_image_and_question(image_file, question, model_name):
66
+ """Process the uploaded image and question to generate an answer"""
67
+ start_time = time.time()
68
+
69
+ try:
70
+ # Load image
71
+ image = Image.open(image_file).convert("RGB")
72
+ logger.info(f"Image loaded, size: {image.size}")
73
+
74
+ # Resize image
75
+ image = resize_image(image)
76
+ logger.info(f"Image resized to: {image.size}")
77
+
78
+ # Load model
79
+ model = load_model(model_name)
80
+ if model is None:
81
+ return None
82
+
83
+ # Generate answer
84
+ logger.info(f"Generating answer for question: '{question}'")
85
+ answer = model.predict(image, question)
86
+ logger.info(f"Answer generated: '{answer}'")
87
+
88
+ # Calculate processing time
89
+ processing_time = time.time() - start_time
90
+
91
+ return {"answer": answer, "processing_time": f"{processing_time:.2f} seconds"}
92
+ except Exception as e:
93
+ logger.error(f"Error processing request: {str(e)}", exc_info=True)
94
+ return None
95
+
96
+
97
+ def main():
98
+ """Main function for Streamlit app"""
99
+ # Header
100
+ st.title("Visual Question Answering")
101
+ st.markdown("Upload an image, ask a question, and get AI-powered answers")
102
+
103
+ # Sidebar for model selection
104
+ st.sidebar.title("Model Options")
105
+ selected_model_name = st.sidebar.radio(
106
+ "Choose a model:", options=list(MODEL_OPTIONS.keys()), index=0
107
+ )
108
+ model_name = MODEL_OPTIONS[selected_model_name]
109
+
110
+ st.sidebar.markdown("---")
111
+ st.sidebar.markdown("## About the Models")
112
+ st.sidebar.markdown("**BLIP**: General purpose VQA with free-form answers")
113
+ st.sidebar.markdown("**ViLT**: Better for yes/no questions and specific categories")
114
+
115
+ # Main content - two columns
116
+ col1, col2 = st.columns([1, 1])
117
+
118
+ with col1:
119
+ st.markdown("### Upload & Ask")
120
+ uploaded_file = st.file_uploader(
121
+ "Upload an image:", type=["jpg", "jpeg", "png", "bmp", "gif"]
122
+ )
123
+
124
+ question = st.text_input(
125
+ "Your question about the image:", placeholder="E.g., What is in this image?"
126
+ )
127
+
128
+ submit_button = st.button(
129
+ "Get Answer", type="primary", use_container_width=True
130
+ )
131
+
132
+ # Preview uploaded image
133
+ if uploaded_file is not None:
134
+ st.markdown("### Image Preview")
135
+ st.image(uploaded_file, caption="Uploaded Image",use_container_width=True)
136
+
137
+ with col2:
138
+ st.markdown("### AI Answer")
139
+
140
+ # Process when submit button is clicked
141
+ if submit_button and uploaded_file is not None and question:
142
+ with st.spinner("Generating answer..."):
143
+ result = process_image_and_question(uploaded_file, question, model_name)
144
+
145
+ if result:
146
+ st.success("Answer generated successfully!")
147
+
148
+ # Display results
149
+ st.markdown("#### Question:")
150
+ st.write(question)
151
+
152
+ st.markdown("#### Answer:")
153
+ st.markdown(
154
+ f"<div style='background-color: #f0f2f6; padding: 20px; border-radius: 5px;'>{result['answer']}</div>",
155
+ unsafe_allow_html=True,
156
+ )
157
+
158
+ st.markdown("#### Processing Time:")
159
+ st.text(result["processing_time"])
160
+ else:
161
+ st.error(
162
+ "Failed to generate an answer. Please check the image and question, and try again."
163
+ )
164
+
165
+ elif not uploaded_file and submit_button:
166
+ st.warning("Please upload an image first.")
167
+ elif not question and submit_button:
168
+ st.warning("Please enter a question about the image.")
169
+ else:
170
+ st.info("AI answers will appear here after you submit your question")
171
+
172
+ # Information about the application
173
+ st.markdown("---")
174
+ st.markdown("### About Visual Question Answering")
175
+ st.markdown("""
176
+ This application uses multi-modal AI, combining computer vision and natural language processing
177
+ to answer questions about images. Here are some examples of questions you can ask:
178
+
179
+ - **Objects**: "What objects are in this image?"
180
+ - **Counting**: "How many people are in this image?"
181
+ - **Colors**: "What color is the car?"
182
+ - **Actions**: "What is the person doing?"
183
+ - **Spatial relations**: "What is to the left of the chair?"
184
+ - **Attributes**: "Is the cat sleeping?"
185
+ """)
186
+
187
+
188
+ if __name__ == "__main__":
189
+ main()