|
import streamlit as st |
|
import requests |
|
import pandas as pd |
|
import json |
|
import os |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
import base64 |
|
from io import BytesIO |
|
from PIL import Image, ImageEnhance |
|
import time |
|
from typing import Dict, Any, List, Optional |
|
import uuid |
|
import asyncio |
|
from pydantic import BaseModel |
|
import traceback |
|
|
|
|
|
|
|
from utils import (search_news, analyze_article_sentiment, perform_comparative_analysis, |
|
translate_to_hindi, text_to_speech, prepare_final_report, NewsArticle) |
|
|
|
|
|
|
|
|
|
API_BASE_URL = "http://localhost:8000" |
|
|
|
|
|
class CompanyRequest(BaseModel): |
|
company_name: str |
|
|
|
class TextToSpeechRequest(BaseModel): |
|
text: str |
|
output_filename: Optional[str] = None |
|
|
|
class SentimentAnalysisRequest(BaseModel): |
|
articles: List[Dict[str, Any]] |
|
|
|
|
|
async def get_news(company_name: str) -> Dict[str, Any]: |
|
try: |
|
articles = search_news(company_name, num_articles=5) |
|
if not articles: |
|
return {"error": f"No news articles found for {company_name}"} |
|
article_data = [article.to_dict() for article in articles] |
|
return {"articles": article_data} |
|
except Exception as e: |
|
return {"error": str(e)} |
|
|
|
async def analyze_sentiment(articles: List[Dict[str, Any]]) -> Dict[str, Any]: |
|
try: |
|
news_articles = [] |
|
for article_dict in articles: |
|
article = NewsArticle( |
|
title=article_dict["title"], |
|
url=article_dict["url"], |
|
content=article_dict["content"], |
|
summary=article_dict.get("summary", ""), |
|
source=article_dict.get("source", ""), |
|
date=article_dict.get("date", ""), |
|
sentiment=article_dict.get("sentiment", ""), |
|
topics=article_dict.get("topics", []) |
|
) |
|
news_articles.append(article) |
|
|
|
detailed_sentiment = [analyze_article_sentiment(article) for article in news_articles] |
|
comparative_analysis = perform_comparative_analysis(news_articles) |
|
return { |
|
"sentiment_analysis": { |
|
"detailed_sentiment": detailed_sentiment, |
|
"comparative_analysis": comparative_analysis |
|
} |
|
} |
|
except Exception as e: |
|
return {"error": str(e)} |
|
|
|
async def generate_speech(text: str, output_filename: str = None) -> Dict[str, Any]: |
|
try: |
|
if not output_filename: |
|
unique_id = uuid.uuid4().hex |
|
output_filename = f"audio_files/{unique_id}.mp3" |
|
elif not output_filename.startswith("audio_files/"): |
|
output_filename = f"audio_files/{output_filename}" |
|
|
|
os.makedirs("audio_files", exist_ok=True) |
|
hindi_text = translate_to_hindi(text) |
|
audio_file = text_to_speech(hindi_text, output_filename) |
|
if not audio_file: |
|
return {"error": "Failed to generate audio file"} |
|
return {"audio_file": audio_file, "text": hindi_text} |
|
except Exception as e: |
|
return {"error": str(e)} |
|
|
|
async def complete_analysis(company_name: str) -> Dict[str, Any]: |
|
try: |
|
articles = search_news(company_name, num_articles=5) |
|
if not articles: |
|
return {"error": f"No news articles found for {company_name}"} |
|
|
|
comparative_analysis = perform_comparative_analysis(articles) |
|
final_report = prepare_final_report(company_name, articles, comparative_analysis) |
|
|
|
unique_id = uuid.uuid4().hex |
|
output_filename = f"audio_files/{unique_id}.mp3" |
|
hindi_text = final_report["Hindi Summary"] |
|
audio_file = text_to_speech(hindi_text, output_filename) |
|
|
|
formatted_response = { |
|
"Company": company_name, |
|
"Articles": final_report["Articles"], |
|
"Comparative Sentiment Score": { |
|
"Sentiment Distribution": comparative_analysis["Sentiment Distribution"], |
|
"Coverage Differences": comparative_analysis["Coverage Differences"], |
|
"Topic Overlap": { |
|
"Common Topics": comparative_analysis["Topic Overlap"]["Common Topics Across All"], |
|
} |
|
}, |
|
"Final Sentiment Analysis": comparative_analysis["Final Sentiment Analysis"], |
|
"Hindi Summary": final_report["Hindi Summary"] |
|
} |
|
|
|
unique_topics = comparative_analysis["Topic Overlap"]["Unique Topics By Article"] |
|
for article_idx, topics in unique_topics.items(): |
|
article_num = int(article_idx) + 1 |
|
formatted_response["Comparative Sentiment Score"]["Topic Overlap"][f"Unique Topics in Article {article_num}"] = topics |
|
|
|
if len(articles) <= 1: |
|
formatted_response["Comparative Sentiment Score"]["Coverage Differences"] = [ |
|
{ |
|
"Comparison": f"Only one article about {company_name} was found, limiting comparative analysis.", |
|
"Impact": "Unable to compare coverage across multiple sources for more comprehensive insights." |
|
} |
|
] |
|
|
|
if audio_file: |
|
formatted_response["Audio"] = "[Play Hindi Speech]" |
|
formatted_response["_audio_file_path"] = audio_file |
|
else: |
|
formatted_response["Audio"] = "Failed to generate audio" |
|
|
|
return formatted_response |
|
except Exception as e: |
|
error_message = f"Error processing request: {str(e)}" |
|
user_message = "An error occurred during analysis. " |
|
if "timeout" in str(e).lower(): |
|
user_message += "There was a timeout when connecting to news sources. Please try again." |
|
elif "connection" in str(e).lower(): |
|
user_message += "There was a connection issue. Please check your internet." |
|
elif "not found" in str(e).lower(): |
|
user_message += f"No information could be found for {company_name}." |
|
else: |
|
user_message += "Please try again." |
|
return {"error": user_message} |
|
|
|
|
|
def generate_example_output(company_name: str) -> str: |
|
loop = asyncio.new_event_loop() |
|
asyncio.set_event_loop(loop) |
|
result = loop.run_until_complete(complete_analysis(company_name)) |
|
formatted_output = { |
|
"Company": result.get("Company", company_name), |
|
"Articles": result.get("Articles", []), |
|
"Comparative Sentiment Score": result.get("Comparative Sentiment Score", { |
|
"Sentiment Distribution": {}, |
|
"Coverage Differences": [], |
|
"Topic Overlap": {} |
|
}), |
|
"Final Sentiment Analysis": result.get("Final Sentiment Analysis", ""), |
|
"Audio": result.get("Audio", "No audio available") |
|
} |
|
return json.dumps(formatted_output, indent=2) |
|
|
|
def get_sentiment_color(sentiment: str) -> str: |
|
if sentiment == "Positive": |
|
return "positive" |
|
elif sentiment == "Negative": |
|
return "negative" |
|
else: |
|
return "neutral" |
|
|
|
def plot_sentiment_distribution(sentiment_data: Dict[str, int]): |
|
labels = ["Positive", "Neutral", "Negative"] |
|
values = [sentiment_data.get(label, 0) for label in labels] |
|
colors = ["#059669", "#6B7280", "#DC2626"] |
|
|
|
fig, ax = plt.subplots(figsize=(10, 6)) |
|
ax.bar(labels, values, color=colors) |
|
ax.set_title("Sentiment Distribution", fontsize=16, fontweight='bold') |
|
ax.set_ylabel("Number of Articles", fontsize=12) |
|
ax.grid(axis='y', linestyle='--', alpha=0.7) |
|
for i, v in enumerate(values): |
|
ax.text(i, v + 0.1, str(v), ha='center', fontweight='bold') |
|
return fig |
|
|
|
def display_article(article: Dict[str, Any], index: int): |
|
st.markdown(f"<div class='card'>", unsafe_allow_html=True) |
|
sentiment = article.get("Sentiment", "Neutral") |
|
sentiment_class = get_sentiment_color(sentiment) |
|
st.markdown(f"<h3 class='article-title'>{index+1}. {article['Title']}</h3>", unsafe_allow_html=True) |
|
st.markdown(f"<span class='{sentiment_class}'>{sentiment}</span>", unsafe_allow_html=True) |
|
st.markdown("<div class='article-summary'>", unsafe_allow_html=True) |
|
st.markdown(f"{article.get('Summary', 'No summary available.')}", unsafe_allow_html=True) |
|
st.markdown("</div>", unsafe_allow_html=True) |
|
if "Topics" in article and article["Topics"]: |
|
st.markdown("<div>", unsafe_allow_html=True) |
|
for topic in article["Topics"]: |
|
st.markdown(f"<span class='topic-tag'>{topic}</span>", unsafe_allow_html=True) |
|
st.markdown("</div>", unsafe_allow_html=True) |
|
st.markdown("</div>", unsafe_allow_html=True) |
|
|
|
|
|
st.set_page_config( |
|
page_title="News Summarization & TTS", |
|
page_icon="📰", |
|
layout="wide", |
|
initial_sidebar_state="expanded" |
|
) |
|
|
|
st.markdown(""" |
|
<style> |
|
.main-header { font-size: 2.5rem; font-weight: 700; color: #1E3A8A; margin-bottom: 1rem; } |
|
.sub-header { font-size: 1.5rem; font-weight: 600; color: #2563EB; margin-top: 1rem; margin-bottom: 0.5rem; } |
|
.card { padding: 1.5rem; border-radius: 0.5rem; background-color: #F8FAFC; border: 1px solid #E2E8F0; margin-bottom: 1rem; } |
|
.positive { color: #059669; font-weight: 600; } |
|
.negative { color: #DC2626; font-weight: 600; } |
|
.neutral { color: #6B7280; font-weight: 600; } |
|
.topic-tag { display: inline-block; padding: 0.25rem 0.5rem; border-radius: 2rem; background-color: #E5E7EB; color: #1F2937; font-size: 0.75rem; margin-right: 0.5rem; margin-bottom: 0.5rem; } |
|
.audio-container { width: 100%; padding: 1rem; background-color: #F3F4F6; border-radius: 0.5rem; margin-top: 1rem; } |
|
.info-text { font-size: 0.9rem; color: #4B5563; } |
|
.article-title { font-size: 1.2rem; font-weight: 600; color: #111827; margin-bottom: 0.5rem; margin-top: 0.5rem; } |
|
.article-summary { font-size: 0.9rem; color: #374151; margin-bottom: 0.5rem; } |
|
.section-divider { height: 1px; background-color: #E5E7EB; margin: 1.5rem 0; } |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
st.markdown("<h1 class='main-header'>📰 News Summarization & Text-to-Speech</h1>", unsafe_allow_html=True) |
|
st.markdown(""" |
|
<p class='info-text'> |
|
This application extracts news articles about a company, performs sentiment analysis, conducts comparative analysis, |
|
and generates a text-to-speech output in Hindi. Enter a company name to get started. |
|
</p> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.sidebar.image("https://cdn-icons-png.flaticon.com/512/2593/2593073.png", width=100) |
|
st.sidebar.title("News Analysis Settings") |
|
|
|
company_input_method = st.sidebar.radio( |
|
"Select company input method:", |
|
options=["Text Input", "Choose from List"] |
|
) |
|
|
|
if company_input_method == "Text Input": |
|
company_name = st.sidebar.text_input("Enter Company Name:", placeholder="e.g., Tesla") |
|
else: |
|
companies = ["Apple", "Google", "Microsoft", "Amazon", "Tesla", "Meta", "Netflix", "Uber", "Airbnb", "Twitter"] |
|
company_name = st.sidebar.selectbox("Select Company:", companies) |
|
|
|
max_articles = st.sidebar.slider("Maximum Articles to Analyze:", min_value=5, max_value=20, value=10) |
|
analyze_button = st.sidebar.button("Analyze Company News", type="primary") |
|
audio_speed = st.sidebar.select_slider("TTS Speech Speed:", options=["Slow", "Normal", "Fast"], value="Normal") |
|
show_json = st.sidebar.checkbox("Show JSON output in example format") |
|
|
|
with st.sidebar.expander("About This App"): |
|
st.markdown(""" |
|
This application performs: |
|
- News extraction from multiple sources |
|
- Sentiment analysis of the content |
|
- Topic identification and comparative analysis |
|
- Text-to-speech conversion to Hindi |
|
""") |
|
|
|
|
|
if analyze_button and company_name: |
|
with st.spinner(f"Analyzing news for {company_name}... This may take a minute"): |
|
loop = asyncio.new_event_loop() |
|
asyncio.set_event_loop(loop) |
|
response = loop.run_until_complete(complete_analysis(company_name)) |
|
|
|
if "error" in response: |
|
st.error(response["error"]) |
|
else: |
|
st.markdown(f"<h2 class='sub-header'>Analysis Results for {response['Company']}</h2>", unsafe_allow_html=True) |
|
|
|
col1, col2 = st.columns([2, 1]) |
|
with col1: |
|
st.markdown("<div class='card'>", unsafe_allow_html=True) |
|
st.markdown("<h3 class='sub-header'>Sentiment Overview</h3>", unsafe_allow_html=True) |
|
st.markdown(f"{response['Final Sentiment Analysis']}") |
|
st.markdown("</div>", unsafe_allow_html=True) |
|
with col2: |
|
sentiment_data = response["Comparative Sentiment Score"]["Sentiment Distribution"] |
|
fig = plot_sentiment_distribution(sentiment_data) |
|
st.pyplot(fig) |
|
|
|
st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True) |
|
|
|
if "Audio" in response and response["Audio"] == "[Play Hindi Speech]": |
|
st.markdown("<h3 class='sub-header'>Hindi Audio Summary</h3>", unsafe_allow_html=True) |
|
audio_file_path = response.get("_audio_file_path") |
|
if audio_file_path and os.path.exists(audio_file_path): |
|
st.markdown("<div class='audio-container'>", unsafe_allow_html=True) |
|
st.audio(audio_file_path, format="audio/mp3") |
|
with open(audio_file_path, "rb") as f: |
|
audio_bytes = f.read() |
|
b64 = base64.b64encode(audio_bytes).decode() |
|
href = f'<a href="data:audio/mp3;base64,{b64}" download="hindi_summary.mp3">Download Hindi Audio</a>' |
|
st.markdown(href, unsafe_allow_html=True) |
|
st.markdown("</div>", unsafe_allow_html=True) |
|
else: |
|
st.warning("Hindi audio could not be generated.") |
|
|
|
with st.expander("Show Hindi Text"): |
|
hindi_text = response.get("Hindi Summary", "Hindi text not available.") |
|
paragraphs = hindi_text.split("। ") |
|
for paragraph in paragraphs: |
|
if paragraph.strip(): |
|
if not paragraph.strip().endswith("।"): |
|
paragraph += "।" |
|
st.markdown(f"<p style='font-size: 16px; margin-bottom: 10px;'>{paragraph}</p>", unsafe_allow_html=True) |
|
|
|
st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True) |
|
|
|
st.markdown("<h3 class='sub-header'>News Articles</h3>", unsafe_allow_html=True) |
|
articles = response.get("Articles", []) |
|
if not articles: |
|
st.info("No articles found for this company.") |
|
else: |
|
for i, article in enumerate(articles): |
|
display_article(article, i) |
|
|
|
st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True) |
|
|
|
st.markdown("<h3 class='sub-header'>Comparative Analysis</h3>", unsafe_allow_html=True) |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.markdown("<div class='card'>", unsafe_allow_html=True) |
|
st.markdown("<h4>Common Topics</h4>", unsafe_allow_html=True) |
|
common_topics = response["Comparative Sentiment Score"]["Topic Overlap"].get("Common Topics", []) |
|
if common_topics: |
|
for topic in common_topics: |
|
st.markdown(f"<span class='topic-tag'>{topic}</span>", unsafe_allow_html=True) |
|
else: |
|
st.info("No common topics found across articles.") |
|
st.markdown("</div>", unsafe_allow_html=True) |
|
with col2: |
|
st.markdown("<div class='card'>", unsafe_allow_html=True) |
|
st.markdown("<h4>Coverage Comparison</h4>", unsafe_allow_html=True) |
|
comparisons = response["Comparative Sentiment Score"].get("Coverage Differences", []) |
|
if comparisons: |
|
for i, comparison in enumerate(comparisons[:3]): |
|
st.markdown(f"<p><strong>{i+1}.</strong> {comparison.get('Comparison', '')}</p>", unsafe_allow_html=True) |
|
st.markdown(f"<p class='info-text'>{comparison.get('Impact', '')}</p>", unsafe_allow_html=True) |
|
else: |
|
st.info("No comparative insights available.") |
|
st.markdown("</div>", unsafe_allow_html=True) |
|
|
|
with st.expander("View All Comparisons"): |
|
for i, comparison in enumerate(comparisons): |
|
st.markdown(f"<p><strong>{i+1}.</strong> {comparison.get('Comparison', '')}</p>", unsafe_allow_html=True) |
|
st.markdown(f"<p class='info-text'>{comparison.get('Impact', '')}</p>", unsafe_allow_html=True) |
|
st.markdown("<hr>", unsafe_allow_html=True) |
|
|
|
if show_json: |
|
st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True) |
|
st.markdown("<h3 class='sub-header'>Example JSON Format</h3>", unsafe_allow_html=True) |
|
json_output = generate_example_output(company_name) |
|
st.code(json_output, language="json") |
|
else: |
|
st.markdown("<div class='card'>", unsafe_allow_html=True) |
|
st.markdown("<h3 class='sub-header'>Enter a Company Name to Begin Analysis</h3>", unsafe_allow_html=True) |
|
st.markdown(""" |
|
<p class='info-text'> |
|
This application will: |
|
</p> |
|
<ul class='info-text'> |
|
<li>Extract news articles from multiple sources</li> |
|
<li>Analyze sentiment (positive, negative, neutral)</li> |
|
<li>Identify key topics in each article</li> |
|
<li>Perform comparative analysis across articles</li> |
|
<li>Generate Hindi speech output summarizing the findings</li> |
|
</ul> |
|
""", unsafe_allow_html=True) |
|
st.markdown("</div>", unsafe_allow_html=True) |
|
st.image("https://miro.medium.com/max/1400/1*Ger-949PgQnaje2oa9XMdw.png", caption="Sample sentiment analysis visualization") |
|
|
|
st.markdown("<div class='section-divider'></div>", unsafe_allow_html=True) |
|
st.markdown("<p class='info-text' style='text-align: center;'>News Summarization & Text-to-Speech Application | Developed with Streamlit</p>", unsafe_allow_html=True) |