Spaces:
Sleeping
Sleeping
File size: 5,880 Bytes
fbec6c3 a263aa6 d6c42b6 70b4875 d6c42b6 70b4875 fbec6c3 70b4875 0623585 70b4875 d6c42b6 7f6ef04 70b4875 0623585 fbec6c3 70b4875 fbec6c3 70b4875 fbec6c3 7a0e4f2 70b4875 fbec6c3 7a0e4f2 70b4875 7a0e4f2 70b4875 7a0e4f2 7f6ef04 70b4875 7a0e4f2 70b4875 7a0e4f2 70b4875 a263aa6 70b4875 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import os
import streamlit as st
import arxiv
import random
import datetime
import requests
from scholarly import scholarly
# -------------------------------
# Helper Functions
# -------------------------------
def get_paper_metadata(arxiv_id):
"""Fetch metadata like citations and connected papers for scoring."""
metadata = {
"citations": 0,
"institution": "Unknown",
"authors": [],
"connected_papers": 0
}
# Fetch citation count from scite.ai
scite_url = f"https://api.scite.ai/v1/papers/arxiv:{arxiv_id}"
response = requests.get(scite_url)
if response.status_code == 200:
data = response.json()
metadata["citations"] = data.get("citation_count", 0)
# Fetch connected paper count from Connected Papers
connected_papers_url = f"https://www.connectedpapers.com/api/graph/{arxiv_id}"
response = requests.get(connected_papers_url)
if response.status_code == 200:
data = response.json()
metadata["connected_papers"] = len(data.get("nodes", []))
return metadata
def calculate_trust_score(metadata):
"""Compute trust score based on citations and author credibility."""
trust_score = 50 # Base score
# Citations factor (max boost 30 points)
if metadata["citations"] > 100:
trust_score += 30
elif metadata["citations"] > 50:
trust_score += 20
elif metadata["citations"] > 10:
trust_score += 10
# Connected Papers factor (max boost 20 points)
if metadata["connected_papers"] > 20:
trust_score += 20
elif metadata["connected_papers"] > 10:
trust_score += 10
return min(trust_score, 100)
def calculate_relevance_score(paper, query):
"""Compute relevance score based on keyword match and recency."""
relevance_score = 50 # Base score
# Keyword match factor
query_terms = query.lower().split()
title_terms = paper['title'].lower().split()
match_count = len(set(query_terms) & set(title_terms))
relevance_score += match_count * 5
# Publication date factor
if isinstance(paper['published'], datetime.datetime):
years_old = datetime.datetime.now().year - paper['published'].year
if years_old < 1:
relevance_score += 15
elif years_old < 3:
relevance_score += 10
elif years_old < 5:
relevance_score += 5
return min(relevance_score, 100)
def retrieve_papers(query, max_results=5):
"""Retrieve academic papers from arXiv."""
search = arxiv.Search(query=query, max_results=max_results)
papers = []
for result in search.results():
arxiv_id = result.entry_id.split("/")[-1]
metadata = get_paper_metadata(arxiv_id)
trust_score = calculate_trust_score(metadata)
relevance_score = calculate_relevance_score({"title": result.title, "published": result.published}, query)
paper = {
"title": result.title,
"summary": result.summary,
"url": result.pdf_url,
"doi": f"https://doi.org/10.48550/arXiv.{arxiv_id}",
"bib_explorer": f"https://arxiv.org/abs/{arxiv_id}",
"connected_papers": f"https://www.connectedpapers.com/api/graph/{arxiv_id}",
"litmaps": f"https://app.litmaps.com/preview/{arxiv_id}",
"scite": f"https://scite.ai/reports/arxiv:{arxiv_id}",
"authors": [author.name for author in result.authors],
"published": result.published,
"trust_score": trust_score,
"relevance_score": relevance_score
}
papers.append(paper)
return papers
def random_paper_search():
"""Retrieve random papers without user input."""
random_queries = ["artificial intelligence", "quantum computing", "neuroscience", "climate change", "robotics"]
query = random.choice(random_queries)
return retrieve_papers(query, max_results=random.randint(5, 15))
# -------------------------------
# Streamlit UI
# -------------------------------
st.title("π PaperPilot β Intelligent Academic Navigator")
with st.sidebar:
st.header("π Search Parameters")
query = st.text_input("Research topic or question:")
col1, col2 = st.columns([3, 1])
with col1:
search_button = st.button("π Find Articles")
with col2:
random_button = st.button("π² Random Papers")
if search_button and query.strip():
with st.spinner("Searching arXiv..."):
papers = retrieve_papers(query)
if papers:
st.session_state.papers = papers
else:
st.error("No papers found. Try different keywords.")
elif random_button:
with st.spinner("Fetching random papers..."):
papers = random_paper_search()
st.session_state.papers = papers
if 'papers' in st.session_state:
for idx, paper in enumerate(st.session_state.papers, 1):
with st.expander(f"{idx}. {paper['title']}"):
st.markdown(f"**Authors:** {', '.join(paper['authors'])}")
st.markdown(f"**Published:** {paper['published'].strftime('%Y-%m-%d') if isinstance(paper['published'], datetime.datetime) else 'Unknown'}")
st.markdown(f"**Trust Score:** {paper['trust_score']} / 100")
st.markdown(f"**Relevance Score:** {paper['relevance_score']} / 100")
st.markdown(f"**DOI:** [Link]({paper['doi']})")
st.markdown(f"**Bibliographic Explorer:** [Link]({paper['bib_explorer']})")
st.markdown(f"**Connected Papers:** [Link]({paper['connected_papers']})")
st.markdown(f"**Litmaps:** [Link]({paper['litmaps']})")
st.markdown(f"**Scite:** [Link]({paper['scite']})")
st.markdown("**Abstract:**")
st.write(paper['summary'])
|