File size: 4,294 Bytes
20d02ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b49f52e
20d02ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b49f52e
 
 
 
 
 
 
 
 
 
 
 
 
20d02ac
b49f52e
 
 
20d02ac
 
b49f52e
 
 
 
20d02ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
898864c
b49f52e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""Configuration settings for the News Summarization application."""

import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# API Settings - Modified for direct processing
API_HOST = "localhost"  # Changed from 0.0.0.0
API_PORT = 8501  # Changed to Streamlit's default port
API_BASE_URL = "http://localhost:8501"  # Direct URL for local processing

# News Scraping Settings
ARTICLES_PER_SOURCE = int(os.getenv("ARTICLES_PER_SOURCE", "10"))
USER_AGENT = os.getenv("USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

# RSS Feed Settings
RSS_FEEDS = {
    "BBC": "http://feeds.bbci.co.uk/news/business/rss.xml",
    "CNN": "http://rss.cnn.com/rss/money_news_international.rss",
    "FoxBusiness": "http://feeds.foxnews.com/foxbusiness/latest"
}

# Model Settings
SENTIMENT_MODEL = "yiyanghkust/finbert-tone"  # More advanced financial sentiment model
SENTIMENT_FINE_GRAINED_MODEL = "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
SUMMARIZATION_MODEL = "t5-base"

# Additional Fine-Grained Sentiment Models
FINE_GRAINED_MODELS = {
    "financial": "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis",
    "emotion": "j-hartmann/emotion-english-distilroberta-base",
    "aspect": "yangheng/deberta-v3-base-absa-v1.1",
    "esg": "yiyanghkust/finbert-esg",
    "news_tone": "ProsusAI/finbert"
}

# Fine-Grained Sentiment Categories
SENTIMENT_CATEGORIES = {
    "financial": ["positive", "negative", "neutral"],
    "emotion": ["joy", "sadness", "anger", "fear", "surprise", "disgust", "neutral"],
    "aspect": ["positive", "negative", "neutral"],
    "esg": ["environmental", "social", "governance", "neutral"],
    "news_tone": ["positive", "negative", "neutral"]
}

# Sentiment Analysis Settings
SENTIMENT_THRESHOLD = float(os.getenv("SENTIMENT_THRESHOLD", "0.2"))
CONFIDENCE_THRESHOLD = float(os.getenv("CONFIDENCE_THRESHOLD", "0.6"))
ENSEMBLE_AGREEMENT_THRESHOLD = float(os.getenv("ENSEMBLE_AGREEMENT_THRESHOLD", "0.7"))

# Entity Recognition Settings
ENTITY_TYPES = ["ORG", "PERSON", "GPE", "MONEY", "DATE", "TIME", "PERCENT", "QUANTITY"]
ENTITY_CONFIDENCE_THRESHOLD = float(os.getenv("ENTITY_CONFIDENCE_THRESHOLD", "0.5"))

# Sentiment Target Settings
TARGET_CONTEXT_WINDOW = int(os.getenv("TARGET_CONTEXT_WINDOW", "100"))  # characters
TARGET_CONFIDENCE_THRESHOLD = float(os.getenv("TARGET_CONFIDENCE_THRESHOLD", "0.5"))

# Cache Settings
CACHE_DIR = os.getenv("CACHE_DIR", ".cache")
CACHE_EXPIRY = int(os.getenv("CACHE_EXPIRY", "3600"))  # 1 hour
CACHE_DURATION = int(os.getenv("CACHE_DURATION", "300"))  # 5 minutes in seconds

# Audio Settings
AUDIO_OUTPUT_DIR = os.getenv("AUDIO_OUTPUT_DIR", "audio_output")
DEFAULT_LANG = os.getenv("DEFAULT_LANG", "hi")  # Hindi
AUDIO_SPEED = float(os.getenv("AUDIO_SPEED", "1.0"))
AUDIO_VOLUME = float(os.getenv("AUDIO_VOLUME", "1.0"))

# News Sources
NEWS_SOURCES = {
    # Major News Aggregators
    "google": "https://www.google.com/search?q={}&tbm=nws",
    "bing": "https://www.bing.com/news/search?q={}",
    "yahoo": "https://news.search.yahoo.com/search?p={}",
    
    # Financial News
    "reuters": "https://www.reuters.com/search/news?blob={}",
    "marketwatch": "https://www.marketwatch.com/search?q={}&ts=0&tab=All%20News",
    "investing": "https://www.investing.com/search/?q={}&tab=news",
    
    # Tech News
    "techcrunch": "https://techcrunch.com/search/{}",
    "zdnet": "https://www.zdnet.com/search/?q={}",
}

# Article limits
MIN_ARTICLES = 20
MAX_ARTICLES_PER_SOURCE = 10  # Adjusted for more sources
MAX_ARTICLES = 50  # Increased to accommodate more sources

# Browser Headers
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.5",
    "Connection": "keep-alive"
}

# Visualization Settings
CHART_COLORS = {
    "positive": "#28a745",
    "negative": "#dc3545",
    "neutral": "#6c757d",
    "financial": "#17a2b8",
    "emotional": "#ffc107",
    "esg": "#20c997"
}

CHART_DIMENSIONS = {
    "width": 600,
    "height": 300,
    "margin": 20
}