Spaces:
Running
Running
""" | |
Search History and Trends Component | |
This component provides UI for displaying and analyzing search history and trends. | |
""" | |
import streamlit as st | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from datetime import datetime, timedelta | |
import asyncio | |
import json | |
from typing import Dict, List, Any, Optional | |
import random | |
from src.api.services.search_history_service import ( | |
get_search_history, | |
get_trending_topics, | |
get_search_trend_analysis, | |
get_popular_searches, | |
add_search_history, | |
save_search, | |
create_saved_search, | |
get_saved_searches | |
) | |
# For demo/placeholder data when database is not populated | |
def generate_demo_trends(): | |
"""Generate demo trend data""" | |
topics = [ | |
"ransomware", "databreach", "malware", "phishing", "zeroday", | |
"darkmarket", "cryptolocker", "anonymity", "botnet", "exploit", | |
"vulnerability", "trojan", "blackmarket", "identity", "creditcard", | |
"hacking", "ddos", "credentials", "bitcoin", "monero" | |
] | |
return [ | |
{ | |
"topic": topic, | |
"mentions": random.randint(5, 100), | |
"growth_rate": random.uniform(0.5, 25.0) | |
} | |
for topic in random.sample(topics, min(len(topics), 10)) | |
] | |
def generate_demo_search_data(days=30): | |
"""Generate demo search frequency data""" | |
base_date = datetime.now() - timedelta(days=days) | |
dates = [base_date + timedelta(days=i) for i in range(days)] | |
base_count = 10 | |
trend = [random.randint(max(0, base_count-5), base_count+15) for _ in range(days)] | |
# Add a spike for visual interest | |
spike_day = random.randint(5, days-5) | |
trend[spike_day] = trend[spike_day] * 3 | |
return [ | |
{"interval": date, "count": count} | |
for date, count in zip(dates, trend) | |
] | |
def generate_demo_search_categories(): | |
"""Generate demo search categories data""" | |
categories = [ | |
"Marketplace", "Forum", "Data Breach", "Hacking Tools", | |
"Credential Dumps", "Crypto", "Scam", "Uncategorized" | |
] | |
return [ | |
{"category": cat, "count": random.randint(10, 100)} | |
for cat in categories | |
] | |
def generate_demo_popular_searches(): | |
"""Generate demo popular searches data""" | |
searches = [ | |
"ransomware as a service", "credit card dumps", "personal data breach", | |
"hacking tools", "bank account access", "identity documents", "covid vaccine cards", | |
"social security numbers", "corporate credentials", "zero day exploits" | |
] | |
return [ | |
{"query": query, "count": random.randint(5, 50)} | |
for query in searches | |
] | |
async def get_trend_data(days=90, trend_days=7, limit=10): | |
"""Get trend data from the database""" | |
try: | |
# Create a session without context manager | |
from src.streamlit_database import async_session | |
session = async_session() | |
try: | |
data = await get_search_trend_analysis( | |
db=session, | |
days=days, | |
trend_days=trend_days, | |
limit=limit | |
) | |
await session.commit() | |
return data | |
except Exception as e: | |
await session.rollback() | |
raise e | |
finally: | |
await session.close() | |
except Exception as e: | |
st.error(f"Error fetching trend data: {e}") | |
# Use demo data as fallback | |
return { | |
"frequency": generate_demo_search_data(days), | |
"popular_searches": generate_demo_popular_searches(), | |
"trending_topics": generate_demo_trends(), | |
"categories": generate_demo_search_categories(), | |
"recent_popular": generate_demo_popular_searches(), | |
"velocity": random.uniform(-10, 30), | |
"total_searches": { | |
"total": 1000, | |
"recent": 400, | |
"previous": 600 | |
} | |
} | |
async def save_search_query(query, user_id=None, category=None, tags=None): | |
"""Save a search query to the database""" | |
try: | |
# Create a session without context manager | |
from src.streamlit_database import async_session | |
session = async_session() | |
try: | |
search = await add_search_history( | |
db=session, | |
query=query, | |
user_id=user_id, | |
category=category, | |
tags=tags, | |
result_count=random.randint(5, 100) # Placeholder | |
) | |
await session.commit() | |
return search | |
except Exception as e: | |
await session.rollback() | |
raise e | |
finally: | |
await session.close() | |
except Exception as e: | |
st.error(f"Error saving search: {e}") | |
return None | |
async def get_user_searches(user_id=None, limit=50): | |
"""Get search history for a user""" | |
try: | |
# Create a session without context manager | |
from src.streamlit_database import async_session | |
session = async_session() | |
try: | |
searches = await get_search_history( | |
db=session, | |
user_id=user_id, | |
limit=limit | |
) | |
await session.commit() | |
return searches | |
except Exception as e: | |
await session.rollback() | |
raise e | |
finally: | |
await session.close() | |
except Exception as e: | |
st.error(f"Error fetching search history: {e}") | |
return [] | |
async def get_user_saved_searches(user_id=None): | |
"""Get saved searches for a user""" | |
try: | |
# Create a session without context manager | |
from src.streamlit_database import async_session | |
session = async_session() | |
try: | |
searches = await get_saved_searches( | |
db=session, | |
user_id=user_id | |
) | |
await session.commit() | |
return searches | |
except Exception as e: | |
await session.rollback() | |
raise e | |
finally: | |
await session.close() | |
except Exception as e: | |
st.error(f"Error fetching saved searches: {e}") | |
return [] | |
async def create_new_saved_search(name, query, user_id=None, frequency=24, category=None): | |
"""Create a new saved search""" | |
try: | |
# Create a session without context manager | |
from src.streamlit_database import async_session | |
session = async_session() | |
try: | |
saved_search = await create_saved_search( | |
db=session, | |
name=name, | |
query=query, | |
user_id=user_id or 1, # Default user ID | |
frequency=frequency, | |
category=category | |
) | |
await session.commit() | |
return saved_search | |
except Exception as e: | |
await session.rollback() | |
raise e | |
finally: | |
await session.close() | |
except Exception as e: | |
st.error(f"Error creating saved search: {e}") | |
return None | |
def plot_search_trends(frequency_data): | |
"""Create a plot of search frequency over time""" | |
if not frequency_data: | |
return None | |
df = pd.DataFrame(frequency_data) | |
if 'interval' in df.columns: | |
df['interval'] = pd.to_datetime(df['interval']) | |
fig = px.line( | |
df, | |
x='interval', | |
y='count', | |
title='Search Frequency Over Time', | |
labels={'interval': 'Date', 'count': 'Number of Searches'}, | |
template='plotly_dark' | |
) | |
fig.update_layout( | |
xaxis_title="Date", | |
yaxis_title="Number of Searches", | |
plot_bgcolor='rgba(17, 17, 17, 0.8)', | |
paper_bgcolor='rgba(17, 17, 17, 0)', | |
font=dict(color='white') | |
) | |
return fig | |
return None | |
def plot_category_distribution(category_data): | |
"""Create a plot of search categories distribution""" | |
if not category_data: | |
return None | |
df = pd.DataFrame(category_data) | |
fig = px.pie( | |
df, | |
values='count', | |
names='category', | |
title='Search Categories Distribution', | |
template='plotly_dark', | |
hole=0.4 | |
) | |
fig.update_layout( | |
plot_bgcolor='rgba(17, 17, 17, 0.8)', | |
paper_bgcolor='rgba(17, 17, 17, 0)', | |
font=dict(color='white') | |
) | |
return fig | |
def plot_trending_topics(trending_data): | |
"""Create a bar chart of trending topics""" | |
if not trending_data: | |
return None | |
df = pd.DataFrame(trending_data) | |
if len(df) == 0: | |
return None | |
# Sort by mentions or growth rate | |
df = df.sort_values('growth_rate', ascending=False) | |
fig = px.bar( | |
df, | |
y='topic', | |
x='growth_rate', | |
title='Trending Topics by Growth Rate', | |
labels={'topic': 'Topic', 'growth_rate': 'Growth Rate (%)'}, | |
orientation='h', | |
template='plotly_dark', | |
color='growth_rate', | |
color_continuous_scale='Viridis' | |
) | |
fig.update_layout( | |
xaxis_title="Growth Rate (%)", | |
yaxis_title="Topic", | |
plot_bgcolor='rgba(17, 17, 17, 0.8)', | |
paper_bgcolor='rgba(17, 17, 17, 0)', | |
font=dict(color='white'), | |
yaxis={'categoryorder': 'total ascending'} | |
) | |
return fig | |
def plot_popular_searches(popular_data): | |
"""Create a bar chart of popular searches""" | |
if not popular_data: | |
return None | |
df = pd.DataFrame(popular_data) | |
if len(df) == 0: | |
return None | |
df = df.sort_values('count', ascending=True) | |
fig = px.bar( | |
df, | |
y='query', | |
x='count', | |
title='Most Popular Search Terms', | |
labels={'query': 'Search Term', 'count': 'Number of Searches'}, | |
orientation='h', | |
template='plotly_dark' | |
) | |
fig.update_layout( | |
xaxis_title="Number of Searches", | |
yaxis_title="Search Term", | |
plot_bgcolor='rgba(17, 17, 17, 0.8)', | |
paper_bgcolor='rgba(17, 17, 17, 0)', | |
font=dict(color='white'), | |
yaxis={'categoryorder': 'total ascending'} | |
) | |
return fig | |
def render_search_box(): | |
"""Render the search box component""" | |
st.markdown("### Search Dark Web Content") | |
col1, col2 = st.columns([3, 1]) | |
with col1: | |
search_query = st.text_input("Enter search terms", placeholder="Enter keywords to search dark web content...") | |
with col2: | |
categories = ["All Categories", "Marketplace", "Forum", "Paste Site", "Data Breach", "Hacking", "Cryptocurrency"] | |
selected_category = st.selectbox("Category", categories, index=0) | |
if selected_category == "All Categories": | |
selected_category = None | |
advanced_options = st.expander("Advanced Search Options", expanded=False) | |
with advanced_options: | |
col1, col2 = st.columns(2) | |
with col1: | |
date_range = st.selectbox( | |
"Date Range", | |
["All Time", "Last 24 Hours", "Last 7 Days", "Last 30 Days", "Last 90 Days", "Custom Range"] | |
) | |
include_images = st.checkbox("Include Images", value=False) | |
include_code = st.checkbox("Include Code Snippets", value=True) | |
with col2: | |
sources = st.multiselect( | |
"Sources", | |
["Dark Forums", "Marketplaces", "Paste Sites", "Leak Sites", "Chat Channels"], | |
default=["Dark Forums", "Marketplaces", "Leak Sites"] | |
) | |
sort_by = st.selectbox( | |
"Sort Results By", | |
["Relevance", "Date (Newest First)", "Date (Oldest First)"] | |
) | |
tags_input = st.text_input("Tags (comma-separated)", placeholder="Add tags to organize your search...") | |
search_button = st.button("Search Dark Web") | |
if search_button and search_query: | |
# Save search to history | |
user_id = getattr(st.session_state, "user_id", None) | |
# Process tags | |
tags = tags_input.strip() if tags_input else None | |
# Run the search | |
with st.spinner("Searching dark web..."): | |
search = asyncio.run(save_search_query( | |
query=search_query, | |
user_id=user_id, | |
category=selected_category, | |
tags=tags | |
)) | |
if search: | |
st.success(f"Search completed: Found {search.result_count} results for '{search_query}'") | |
# In a real application, we would display results here | |
# Offer to save as a monitored search | |
save_col1, save_col2 = st.columns([3, 1]) | |
with save_col1: | |
search_name = st.text_input( | |
"Save this search for monitoring (enter a name)", | |
placeholder="My saved search" | |
) | |
with save_col2: | |
frequency = st.selectbox( | |
"Check frequency", | |
["Manual only", "Daily", "Every 12 hours", "Every 6 hours", "Hourly"], | |
index=1 | |
) | |
# Map to hours | |
freq_mapping = { | |
"Manual only": 0, | |
"Daily": 24, | |
"Every 12 hours": 12, | |
"Every 6 hours": 6, | |
"Hourly": 1 | |
} | |
freq_hours = freq_mapping.get(frequency, 24) | |
if st.button("Save for Monitoring"): | |
if search_name: | |
saved = asyncio.run(create_new_saved_search( | |
name=search_name, | |
query=search_query, | |
user_id=user_id, | |
frequency=freq_hours, | |
category=selected_category | |
)) | |
if saved: | |
st.success(f"Saved search '{search_name}' created successfully!") | |
else: | |
st.error("Please enter a name for your saved search") | |
else: | |
st.error("Failed to perform search. Please try again.") | |
def render_search_history(): | |
"""Render the search history component""" | |
st.markdown("### Your Search History") | |
user_id = getattr(st.session_state, "user_id", None) | |
# Fetch search history | |
searches = asyncio.run(get_user_searches(user_id)) | |
if not searches: | |
st.info("No search history found. Try searching for dark web content.") | |
return | |
# Convert to DataFrame for display | |
search_data = [] | |
for search in searches: | |
search_data.append({ | |
"ID": search.id, | |
"Query": search.query, | |
"Date": search.timestamp.strftime("%Y-%m-%d %H:%M"), | |
"Results": search.result_count, | |
"Category": search.category or "All", | |
"Saved": "✓" if search.is_saved else "" | |
}) | |
df = pd.DataFrame(search_data) | |
# Display as table | |
st.dataframe( | |
df, | |
use_container_width=True, | |
column_config={ | |
"ID": st.column_config.NumberColumn(format="%d"), | |
"Query": st.column_config.TextColumn(), | |
"Date": st.column_config.DatetimeColumn(), | |
"Results": st.column_config.NumberColumn(), | |
"Category": st.column_config.TextColumn(), | |
"Saved": st.column_config.TextColumn() | |
} | |
) | |
def render_saved_searches(): | |
"""Render the saved searches component""" | |
st.markdown("### Saved Searches") | |
user_id = getattr(st.session_state, "user_id", None) | |
# Fetch saved searches | |
saved_searches = asyncio.run(get_user_saved_searches(user_id)) | |
if not saved_searches: | |
st.info("No saved searches found. Save a search to monitor for new results.") | |
return | |
# Convert to DataFrame for display | |
search_data = [] | |
for search in saved_searches: | |
# Calculate next run time | |
if search.last_run_at and search.frequency > 0: | |
next_run = search.last_run_at + timedelta(hours=search.frequency) | |
else: | |
next_run = "Manual only" | |
search_data.append({ | |
"ID": search.id, | |
"Name": search.name, | |
"Query": search.query, | |
"Category": search.category or "All", | |
"Frequency": f"{search.frequency}h" if search.frequency > 0 else "Manual", | |
"Last Run": search.last_run_at.strftime("%Y-%m-%d %H:%M") if search.last_run_at else "Never", | |
"Next Run": next_run if isinstance(next_run, str) else next_run.strftime("%Y-%m-%d %H:%M"), | |
"Status": "Active" if search.is_active else "Paused" | |
}) | |
df = pd.DataFrame(search_data) | |
# Display as table | |
st.dataframe( | |
df, | |
use_container_width=True | |
) | |
# Action buttons | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
if st.button("Run Selected Searches Now"): | |
st.info("This would trigger manual execution of selected searches") | |
with col2: | |
if st.button("Pause Selected"): | |
st.info("This would pause the selected searches") | |
with col3: | |
if st.button("Delete Selected"): | |
st.info("This would delete the selected searches") | |
def render_trend_dashboard(): | |
"""Render the trend dashboard component""" | |
st.markdown("## Search Trends Analysis") | |
# Time period selector | |
col1, col2 = st.columns([1, 3]) | |
with col1: | |
time_period = st.selectbox( | |
"Time Period", | |
["Last 7 Days", "Last 30 Days", "Last 90 Days", "Last Year"], | |
index=1 | |
) | |
# Map to days | |
period_mapping = { | |
"Last 7 Days": 7, | |
"Last 30 Days": 30, | |
"Last 90 Days": 90, | |
"Last Year": 365 | |
} | |
days = period_mapping.get(time_period, 30) | |
with col2: | |
st.markdown("") # Spacing | |
# Fetch trend data | |
with st.spinner("Loading trend data..."): | |
trend_data = asyncio.run(get_trend_data(days=days)) | |
# Create layout for visualizations | |
col1, col2 = st.columns(2) | |
with col1: | |
search_trend_fig = plot_search_trends(trend_data.get("frequency", [])) | |
if search_trend_fig: | |
st.plotly_chart(search_trend_fig, use_container_width=True) | |
else: | |
st.error("Failed to load search trend data") | |
popular_searches_fig = plot_popular_searches(trend_data.get("popular_searches", [])) | |
if popular_searches_fig: | |
st.plotly_chart(popular_searches_fig, use_container_width=True) | |
else: | |
st.error("Failed to load popular searches data") | |
with col2: | |
trending_topics_fig = plot_trending_topics(trend_data.get("trending_topics", [])) | |
if trending_topics_fig: | |
st.plotly_chart(trending_topics_fig, use_container_width=True) | |
else: | |
st.error("Failed to load trending topics data") | |
category_fig = plot_category_distribution(trend_data.get("categories", [])) | |
if category_fig: | |
st.plotly_chart(category_fig, use_container_width=True) | |
else: | |
st.error("Failed to load category distribution data") | |
# Display trend insights | |
st.markdown("### Trend Insights") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
velocity = trend_data.get("velocity", 0) | |
velocity_color = "green" if velocity > 0 else "red" | |
velocity_icon = "↗️" if velocity > 0 else "↘️" | |
st.markdown(f""" | |
### Search Velocity | |
<h2 style="color:{velocity_color}">{velocity_icon} {abs(velocity):.1f}%</h2> | |
<p>Change in search volume compared to previous period</p> | |
""", unsafe_allow_html=True) | |
with col2: | |
total_searches = trend_data.get("total_searches", {}).get("total", 0) | |
st.markdown(f""" | |
### Total Searches | |
<h2>{total_searches:,}</h2> | |
<p>Total searches in the selected period</p> | |
""", unsafe_allow_html=True) | |
with col3: | |
top_topic = "None" | |
top_growth = 0 | |
if trend_data.get("trending_topics"): | |
top_item = max(trend_data["trending_topics"], key=lambda x: x.get("growth_rate", 0)) | |
top_topic = top_item.get("topic", "None") | |
top_growth = top_item.get("growth_rate", 0) | |
st.markdown(f""" | |
### Fastest Growing Topic | |
<h2>{top_topic}</h2> | |
<p>Growth rate: {top_growth:.1f}%</p> | |
""", unsafe_allow_html=True) | |
# Display emerging themes (if available) | |
if trend_data.get("trending_topics"): | |
st.markdown("### Emerging Dark Web Themes") | |
# Group topics by similar growth rates | |
topics = trend_data["trending_topics"] | |
# Display as topic clusters with common themes | |
theme_groups = { | |
"High Growth": [t for t in topics if t.get("growth_rate", 0) > 15], | |
"Moderate Growth": [t for t in topics if 5 <= t.get("growth_rate", 0) <= 15], | |
"Stable": [t for t in topics if t.get("growth_rate", 0) < 5] | |
} | |
for theme, items in theme_groups.items(): | |
if items: | |
st.markdown(f"#### {theme}") | |
themes_text = ", ".join([f"{t.get('topic')} ({t.get('growth_rate', 0):.1f}%)" for t in items]) | |
st.markdown(f"<p>{themes_text}</p>", unsafe_allow_html=True) | |
def render_search_trends(): | |
"""Main function to render the search trends component""" | |
st.title("Dark Web Search & Trends") | |
tabs = st.tabs([ | |
"Search Dark Web", | |
"Search History", | |
"Saved Searches", | |
"Trend Analysis" | |
]) | |
with tabs[0]: | |
render_search_box() | |
with tabs[1]: | |
render_search_history() | |
with tabs[2]: | |
render_saved_searches() | |
with tabs[3]: | |
render_trend_dashboard() |