import os import streamlit as st import arxiv import random import datetime import requests from scholarly import scholarly # ------------------------------- # Helper Functions # ------------------------------- def get_paper_metadata(arxiv_id): """Fetch metadata like citations and connected papers for scoring.""" metadata = { "citations": 0, "institution": "Unknown", "authors": [], "connected_papers": 0 } # Fetch citation count from scite.ai scite_url = f"https://api.scite.ai/v1/papers/arxiv:{arxiv_id}" response = requests.get(scite_url) if response.status_code == 200: data = response.json() metadata["citations"] = data.get("citation_count", 0) # Fetch connected paper count from Connected Papers connected_papers_url = f"https://www.connectedpapers.com/api/graph/{arxiv_id}" response = requests.get(connected_papers_url) if response.status_code == 200: data = response.json() metadata["connected_papers"] = len(data.get("nodes", [])) return metadata def calculate_trust_score(metadata): """Compute trust score based on citations and author credibility.""" trust_score = 50 # Base score # Citations factor (max boost 30 points) if metadata["citations"] > 100: trust_score += 30 elif metadata["citations"] > 50: trust_score += 20 elif metadata["citations"] > 10: trust_score += 10 # Connected Papers factor (max boost 20 points) if metadata["connected_papers"] > 20: trust_score += 20 elif metadata["connected_papers"] > 10: trust_score += 10 return min(trust_score, 100) def calculate_relevance_score(paper, query): """Compute relevance score based on keyword match and recency.""" relevance_score = 50 # Base score # Keyword match factor query_terms = query.lower().split() title_terms = paper['title'].lower().split() match_count = len(set(query_terms) & set(title_terms)) relevance_score += match_count * 5 # Publication date factor if isinstance(paper['published'], datetime.datetime): years_old = datetime.datetime.now().year - paper['published'].year if years_old < 1: relevance_score += 15 elif years_old < 3: relevance_score += 10 elif years_old < 5: relevance_score += 5 return min(relevance_score, 100) def retrieve_papers(query, max_results=5): """Retrieve academic papers from arXiv.""" search = arxiv.Search(query=query, max_results=max_results) papers = [] for result in search.results(): arxiv_id = result.entry_id.split("/")[-1] metadata = get_paper_metadata(arxiv_id) trust_score = calculate_trust_score(metadata) relevance_score = calculate_relevance_score({"title": result.title, "published": result.published}, query) paper = { "title": result.title, "summary": result.summary, "url": result.pdf_url, "doi": f"https://doi.org/10.48550/arXiv.{arxiv_id}", "bib_explorer": f"https://arxiv.org/abs/{arxiv_id}", "connected_papers": f"https://www.connectedpapers.com/api/graph/{arxiv_id}", "litmaps": f"https://app.litmaps.com/preview/{arxiv_id}", "scite": f"https://scite.ai/reports/arxiv:{arxiv_id}", "authors": [author.name for author in result.authors], "published": result.published, "trust_score": trust_score, "relevance_score": relevance_score } papers.append(paper) return papers def random_paper_search(): """Retrieve random papers without user input.""" random_queries = ["artificial intelligence", "quantum computing", "neuroscience", "climate change", "robotics"] query = random.choice(random_queries) return retrieve_papers(query, max_results=random.randint(5, 15)) # ------------------------------- # Streamlit UI # ------------------------------- st.title("📚 PaperPilot – Intelligent Academic Navigator") with st.sidebar: st.header("🔍 Search Parameters") query = st.text_input("Research topic or question:") col1, col2 = st.columns([3, 1]) with col1: search_button = st.button("🚀 Find Articles") with col2: random_button = st.button("🎲 Random Papers") if search_button and query.strip(): with st.spinner("Searching arXiv..."): papers = retrieve_papers(query) if papers: st.session_state.papers = papers else: st.error("No papers found. Try different keywords.") elif random_button: with st.spinner("Fetching random papers..."): papers = random_paper_search() st.session_state.papers = papers if 'papers' in st.session_state: for idx, paper in enumerate(st.session_state.papers, 1): with st.expander(f"{idx}. {paper['title']}"): st.markdown(f"**Authors:** {', '.join(paper['authors'])}") st.markdown(f"**Published:** {paper['published'].strftime('%Y-%m-%d') if isinstance(paper['published'], datetime.datetime) else 'Unknown'}") st.markdown(f"**Trust Score:** {paper['trust_score']} / 100") st.markdown(f"**Relevance Score:** {paper['relevance_score']} / 100") st.markdown(f"**DOI:** [Link]({paper['doi']})") st.markdown(f"**Bibliographic Explorer:** [Link]({paper['bib_explorer']})") st.markdown(f"**Connected Papers:** [Link]({paper['connected_papers']})") st.markdown(f"**Litmaps:** [Link]({paper['litmaps']})") st.markdown(f"**Scite:** [Link]({paper['scite']})") st.markdown("**Abstract:**") st.write(paper['summary'])