Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
import arxiv | |
import random | |
import datetime | |
import requests | |
from scholarly import scholarly | |
# ------------------------------- | |
# Helper Functions | |
# ------------------------------- | |
def get_paper_metadata(arxiv_id): | |
"""Fetch metadata like citations and connected papers for scoring.""" | |
metadata = { | |
"citations": 0, | |
"institution": "Unknown", | |
"authors": [], | |
"connected_papers": 0 | |
} | |
# Fetch citation count from scite.ai | |
scite_url = f"https://api.scite.ai/v1/papers/arxiv:{arxiv_id}" | |
response = requests.get(scite_url) | |
if response.status_code == 200: | |
data = response.json() | |
metadata["citations"] = data.get("citation_count", 0) | |
# Fetch connected paper count from Connected Papers | |
connected_papers_url = f"https://www.connectedpapers.com/api/graph/{arxiv_id}" | |
response = requests.get(connected_papers_url) | |
if response.status_code == 200: | |
data = response.json() | |
metadata["connected_papers"] = len(data.get("nodes", [])) | |
return metadata | |
def calculate_trust_score(metadata): | |
"""Compute trust score based on citations and author credibility.""" | |
trust_score = 50 # Base score | |
# Citations factor (max boost 30 points) | |
if metadata["citations"] > 100: | |
trust_score += 30 | |
elif metadata["citations"] > 50: | |
trust_score += 20 | |
elif metadata["citations"] > 10: | |
trust_score += 10 | |
# Connected Papers factor (max boost 20 points) | |
if metadata["connected_papers"] > 20: | |
trust_score += 20 | |
elif metadata["connected_papers"] > 10: | |
trust_score += 10 | |
return min(trust_score, 100) | |
def calculate_relevance_score(paper, query): | |
"""Compute relevance score based on keyword match and recency.""" | |
relevance_score = 50 # Base score | |
# Keyword match factor | |
query_terms = query.lower().split() | |
title_terms = paper['title'].lower().split() | |
match_count = len(set(query_terms) & set(title_terms)) | |
relevance_score += match_count * 5 | |
# Publication date factor | |
if isinstance(paper['published'], datetime.datetime): | |
years_old = datetime.datetime.now().year - paper['published'].year | |
if years_old < 1: | |
relevance_score += 15 | |
elif years_old < 3: | |
relevance_score += 10 | |
elif years_old < 5: | |
relevance_score += 5 | |
return min(relevance_score, 100) | |
def retrieve_papers(query, max_results=5): | |
"""Retrieve academic papers from arXiv.""" | |
search = arxiv.Search(query=query, max_results=max_results) | |
papers = [] | |
for result in search.results(): | |
arxiv_id = result.entry_id.split("/")[-1] | |
metadata = get_paper_metadata(arxiv_id) | |
trust_score = calculate_trust_score(metadata) | |
relevance_score = calculate_relevance_score({"title": result.title, "published": result.published}, query) | |
paper = { | |
"title": result.title, | |
"summary": result.summary, | |
"url": result.pdf_url, | |
"doi": f"https://doi.org/10.48550/arXiv.{arxiv_id}", | |
"bib_explorer": f"https://arxiv.org/abs/{arxiv_id}", | |
"connected_papers": f"https://www.connectedpapers.com/api/graph/{arxiv_id}", | |
"litmaps": f"https://app.litmaps.com/preview/{arxiv_id}", | |
"scite": f"https://scite.ai/reports/arxiv:{arxiv_id}", | |
"authors": [author.name for author in result.authors], | |
"published": result.published, | |
"trust_score": trust_score, | |
"relevance_score": relevance_score | |
} | |
papers.append(paper) | |
return papers | |
def random_paper_search(): | |
"""Retrieve random papers without user input.""" | |
random_queries = ["artificial intelligence", "quantum computing", "neuroscience", "climate change", "robotics"] | |
query = random.choice(random_queries) | |
return retrieve_papers(query, max_results=random.randint(5, 15)) | |
# ------------------------------- | |
# Streamlit UI | |
# ------------------------------- | |
st.title("π PaperPilot β Intelligent Academic Navigator") | |
with st.sidebar: | |
st.header("π Search Parameters") | |
query = st.text_input("Research topic or question:") | |
col1, col2 = st.columns([3, 1]) | |
with col1: | |
search_button = st.button("π Find Articles") | |
with col2: | |
random_button = st.button("π² Random Papers") | |
if search_button and query.strip(): | |
with st.spinner("Searching arXiv..."): | |
papers = retrieve_papers(query) | |
if papers: | |
st.session_state.papers = papers | |
else: | |
st.error("No papers found. Try different keywords.") | |
elif random_button: | |
with st.spinner("Fetching random papers..."): | |
papers = random_paper_search() | |
st.session_state.papers = papers | |
if 'papers' in st.session_state: | |
for idx, paper in enumerate(st.session_state.papers, 1): | |
with st.expander(f"{idx}. {paper['title']}"): | |
st.markdown(f"**Authors:** {', '.join(paper['authors'])}") | |
st.markdown(f"**Published:** {paper['published'].strftime('%Y-%m-%d') if isinstance(paper['published'], datetime.datetime) else 'Unknown'}") | |
st.markdown(f"**Trust Score:** {paper['trust_score']} / 100") | |
st.markdown(f"**Relevance Score:** {paper['relevance_score']} / 100") | |
st.markdown(f"**DOI:** [Link]({paper['doi']})") | |
st.markdown(f"**Bibliographic Explorer:** [Link]({paper['bib_explorer']})") | |
st.markdown(f"**Connected Papers:** [Link]({paper['connected_papers']})") | |
st.markdown(f"**Litmaps:** [Link]({paper['litmaps']})") | |
st.markdown(f"**Scite:** [Link]({paper['scite']})") | |
st.markdown("**Abstract:**") | |
st.write(paper['summary']) | |