File size: 5,880 Bytes
fbec6c3
 
 
a263aa6
d6c42b6
70b4875
 
d6c42b6
70b4875
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fbec6c3
70b4875
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0623585
70b4875
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6c42b6
7f6ef04
70b4875
0623585
fbec6c3
 
70b4875
 
 
 
 
fbec6c3
 
 
 
70b4875
 
 
 
 
fbec6c3
7a0e4f2
70b4875
 
fbec6c3
 
 
 
7a0e4f2
70b4875
 
 
 
7a0e4f2
70b4875
 
 
7a0e4f2
7f6ef04
70b4875
 
 
 
 
 
 
 
 
 
 
7a0e4f2
70b4875
7a0e4f2
 
 
 
70b4875
 
 
 
a263aa6
70b4875
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import os
import streamlit as st
import arxiv
import random
import datetime
import requests
from scholarly import scholarly

# -------------------------------
# Helper Functions
# -------------------------------
def get_paper_metadata(arxiv_id):
    """Fetch metadata like citations and connected papers for scoring."""
    metadata = {
        "citations": 0,
        "institution": "Unknown",
        "authors": [],
        "connected_papers": 0
    }
    
    # Fetch citation count from scite.ai
    scite_url = f"https://api.scite.ai/v1/papers/arxiv:{arxiv_id}"
    response = requests.get(scite_url)
    if response.status_code == 200:
        data = response.json()
        metadata["citations"] = data.get("citation_count", 0)
    
    # Fetch connected paper count from Connected Papers
    connected_papers_url = f"https://www.connectedpapers.com/api/graph/{arxiv_id}"
    response = requests.get(connected_papers_url)
    if response.status_code == 200:
        data = response.json()
        metadata["connected_papers"] = len(data.get("nodes", []))
    
    return metadata

def calculate_trust_score(metadata):
    """Compute trust score based on citations and author credibility."""
    trust_score = 50  # Base score
    
    # Citations factor (max boost 30 points)
    if metadata["citations"] > 100:
        trust_score += 30
    elif metadata["citations"] > 50:
        trust_score += 20
    elif metadata["citations"] > 10:
        trust_score += 10
    
    # Connected Papers factor (max boost 20 points)
    if metadata["connected_papers"] > 20:
        trust_score += 20
    elif metadata["connected_papers"] > 10:
        trust_score += 10
    
    return min(trust_score, 100)

def calculate_relevance_score(paper, query):
    """Compute relevance score based on keyword match and recency."""
    relevance_score = 50  # Base score
    
    # Keyword match factor
    query_terms = query.lower().split()
    title_terms = paper['title'].lower().split()
    match_count = len(set(query_terms) & set(title_terms))
    relevance_score += match_count * 5
    
    # Publication date factor
    if isinstance(paper['published'], datetime.datetime):
        years_old = datetime.datetime.now().year - paper['published'].year
        if years_old < 1:
            relevance_score += 15
        elif years_old < 3:
            relevance_score += 10
        elif years_old < 5:
            relevance_score += 5
    
    return min(relevance_score, 100)

def retrieve_papers(query, max_results=5):
    """Retrieve academic papers from arXiv."""
    search = arxiv.Search(query=query, max_results=max_results)
    papers = []
    for result in search.results():
        arxiv_id = result.entry_id.split("/")[-1]
        metadata = get_paper_metadata(arxiv_id)
        trust_score = calculate_trust_score(metadata)
        relevance_score = calculate_relevance_score({"title": result.title, "published": result.published}, query)
        
        paper = {
            "title": result.title,
            "summary": result.summary,
            "url": result.pdf_url,
            "doi": f"https://doi.org/10.48550/arXiv.{arxiv_id}",
            "bib_explorer": f"https://arxiv.org/abs/{arxiv_id}",
            "connected_papers": f"https://www.connectedpapers.com/api/graph/{arxiv_id}",
            "litmaps": f"https://app.litmaps.com/preview/{arxiv_id}",
            "scite": f"https://scite.ai/reports/arxiv:{arxiv_id}",
            "authors": [author.name for author in result.authors],
            "published": result.published,
            "trust_score": trust_score,
            "relevance_score": relevance_score
        }
        papers.append(paper)
    return papers

def random_paper_search():
    """Retrieve random papers without user input."""
    random_queries = ["artificial intelligence", "quantum computing", "neuroscience", "climate change", "robotics"]
    query = random.choice(random_queries)
    return retrieve_papers(query, max_results=random.randint(5, 15))

# -------------------------------
# Streamlit UI
# -------------------------------
st.title("πŸ“š PaperPilot – Intelligent Academic Navigator")

with st.sidebar:
    st.header("πŸ” Search Parameters")
    query = st.text_input("Research topic or question:")
    
    col1, col2 = st.columns([3, 1])
    with col1:
        search_button = st.button("πŸš€ Find Articles")
    with col2:
        random_button = st.button("🎲 Random Papers")
    
    if search_button and query.strip():
        with st.spinner("Searching arXiv..."):
            papers = retrieve_papers(query)
            if papers:
                st.session_state.papers = papers
            else:
                st.error("No papers found. Try different keywords.")
    elif random_button:
        with st.spinner("Fetching random papers..."):
            papers = random_paper_search()
            st.session_state.papers = papers

if 'papers' in st.session_state:
    for idx, paper in enumerate(st.session_state.papers, 1):
        with st.expander(f"{idx}. {paper['title']}"):
            st.markdown(f"**Authors:** {', '.join(paper['authors'])}")
            st.markdown(f"**Published:** {paper['published'].strftime('%Y-%m-%d') if isinstance(paper['published'], datetime.datetime) else 'Unknown'}")
            st.markdown(f"**Trust Score:** {paper['trust_score']} / 100")
            st.markdown(f"**Relevance Score:** {paper['relevance_score']} / 100")
            st.markdown(f"**DOI:** [Link]({paper['doi']})")
            st.markdown(f"**Bibliographic Explorer:** [Link]({paper['bib_explorer']})")
            st.markdown(f"**Connected Papers:** [Link]({paper['connected_papers']})")
            st.markdown(f"**Litmaps:** [Link]({paper['litmaps']})")
            st.markdown(f"**Scite:** [Link]({paper['scite']})")
            st.markdown("**Abstract:**")
            st.write(paper['summary'])