PaperPilot / app.py
flytoe's picture
Update app.py
70b4875 verified
import os
import streamlit as st
import arxiv
import random
import datetime
import requests
from scholarly import scholarly
# -------------------------------
# Helper Functions
# -------------------------------
def get_paper_metadata(arxiv_id):
"""Fetch metadata like citations and connected papers for scoring."""
metadata = {
"citations": 0,
"institution": "Unknown",
"authors": [],
"connected_papers": 0
}
# Fetch citation count from scite.ai
scite_url = f"https://api.scite.ai/v1/papers/arxiv:{arxiv_id}"
response = requests.get(scite_url)
if response.status_code == 200:
data = response.json()
metadata["citations"] = data.get("citation_count", 0)
# Fetch connected paper count from Connected Papers
connected_papers_url = f"https://www.connectedpapers.com/api/graph/{arxiv_id}"
response = requests.get(connected_papers_url)
if response.status_code == 200:
data = response.json()
metadata["connected_papers"] = len(data.get("nodes", []))
return metadata
def calculate_trust_score(metadata):
"""Compute trust score based on citations and author credibility."""
trust_score = 50 # Base score
# Citations factor (max boost 30 points)
if metadata["citations"] > 100:
trust_score += 30
elif metadata["citations"] > 50:
trust_score += 20
elif metadata["citations"] > 10:
trust_score += 10
# Connected Papers factor (max boost 20 points)
if metadata["connected_papers"] > 20:
trust_score += 20
elif metadata["connected_papers"] > 10:
trust_score += 10
return min(trust_score, 100)
def calculate_relevance_score(paper, query):
"""Compute relevance score based on keyword match and recency."""
relevance_score = 50 # Base score
# Keyword match factor
query_terms = query.lower().split()
title_terms = paper['title'].lower().split()
match_count = len(set(query_terms) & set(title_terms))
relevance_score += match_count * 5
# Publication date factor
if isinstance(paper['published'], datetime.datetime):
years_old = datetime.datetime.now().year - paper['published'].year
if years_old < 1:
relevance_score += 15
elif years_old < 3:
relevance_score += 10
elif years_old < 5:
relevance_score += 5
return min(relevance_score, 100)
def retrieve_papers(query, max_results=5):
"""Retrieve academic papers from arXiv."""
search = arxiv.Search(query=query, max_results=max_results)
papers = []
for result in search.results():
arxiv_id = result.entry_id.split("/")[-1]
metadata = get_paper_metadata(arxiv_id)
trust_score = calculate_trust_score(metadata)
relevance_score = calculate_relevance_score({"title": result.title, "published": result.published}, query)
paper = {
"title": result.title,
"summary": result.summary,
"url": result.pdf_url,
"doi": f"https://doi.org/10.48550/arXiv.{arxiv_id}",
"bib_explorer": f"https://arxiv.org/abs/{arxiv_id}",
"connected_papers": f"https://www.connectedpapers.com/api/graph/{arxiv_id}",
"litmaps": f"https://app.litmaps.com/preview/{arxiv_id}",
"scite": f"https://scite.ai/reports/arxiv:{arxiv_id}",
"authors": [author.name for author in result.authors],
"published": result.published,
"trust_score": trust_score,
"relevance_score": relevance_score
}
papers.append(paper)
return papers
def random_paper_search():
"""Retrieve random papers without user input."""
random_queries = ["artificial intelligence", "quantum computing", "neuroscience", "climate change", "robotics"]
query = random.choice(random_queries)
return retrieve_papers(query, max_results=random.randint(5, 15))
# -------------------------------
# Streamlit UI
# -------------------------------
st.title("πŸ“š PaperPilot – Intelligent Academic Navigator")
with st.sidebar:
st.header("πŸ” Search Parameters")
query = st.text_input("Research topic or question:")
col1, col2 = st.columns([3, 1])
with col1:
search_button = st.button("πŸš€ Find Articles")
with col2:
random_button = st.button("🎲 Random Papers")
if search_button and query.strip():
with st.spinner("Searching arXiv..."):
papers = retrieve_papers(query)
if papers:
st.session_state.papers = papers
else:
st.error("No papers found. Try different keywords.")
elif random_button:
with st.spinner("Fetching random papers..."):
papers = random_paper_search()
st.session_state.papers = papers
if 'papers' in st.session_state:
for idx, paper in enumerate(st.session_state.papers, 1):
with st.expander(f"{idx}. {paper['title']}"):
st.markdown(f"**Authors:** {', '.join(paper['authors'])}")
st.markdown(f"**Published:** {paper['published'].strftime('%Y-%m-%d') if isinstance(paper['published'], datetime.datetime) else 'Unknown'}")
st.markdown(f"**Trust Score:** {paper['trust_score']} / 100")
st.markdown(f"**Relevance Score:** {paper['relevance_score']} / 100")
st.markdown(f"**DOI:** [Link]({paper['doi']})")
st.markdown(f"**Bibliographic Explorer:** [Link]({paper['bib_explorer']})")
st.markdown(f"**Connected Papers:** [Link]({paper['connected_papers']})")
st.markdown(f"**Litmaps:** [Link]({paper['litmaps']})")
st.markdown(f"**Scite:** [Link]({paper['scite']})")
st.markdown("**Abstract:**")
st.write(paper['summary'])