Ferocious0xide commited on
Commit
d4c1ac1
·
verified ·
1 Parent(s): 4481c8f

Create arxiv_tool.py

Browse files
Files changed (1) hide show
  1. tools/arxiv_tool.py +92 -0
tools/arxiv_tool.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import arxiv
2
+ from datetime import datetime, timedelta
3
+ import json
4
+ import os
5
+ from typing import List, Dict
6
+ from smolagents import Tool
7
+
8
+ class ArxivSearchTool(Tool):
9
+ name = "search_arxiv"
10
+ description = "Search ArXiv for papers matching the query"
11
+ input_types = {"query": str, "max_results": int}
12
+ output_type = List[Dict]
13
+
14
+ def __call__(self, query: str = "artificial intelligence",
15
+ max_results: int = 50) -> List[Dict]:
16
+ try:
17
+ # Configure the search client
18
+ client = arxiv.Client()
19
+
20
+ # Create the search query
21
+ search = arxiv.Search(
22
+ query=query,
23
+ max_results=max_results,
24
+ sort_by=arxiv.SortCriterion.SubmittedDate
25
+ )
26
+
27
+ # Get results
28
+ results = []
29
+ for paper in client.results(search):
30
+ result = {
31
+ 'title': paper.title,
32
+ 'authors': [str(author) for author in paper.authors],
33
+ 'summary': paper.summary,
34
+ 'published': paper.published.strftime("%Y-%m-%d"),
35
+ 'pdf_url': paper.pdf_url,
36
+ 'entry_id': paper.entry_id,
37
+ 'primary_category': paper.primary_category,
38
+ 'categories': paper.categories
39
+ }
40
+ results.append(result)
41
+
42
+ return results
43
+ except Exception as e:
44
+ return [{"error": f"Error searching ArXiv: {str(e)}"}]
45
+
46
+ class LatestPapersTool(Tool):
47
+ name = "get_latest_papers"
48
+ description = "Get papers from the last N days from saved results"
49
+ input_types = {"days_back": int}
50
+ output_type = List[Dict]
51
+
52
+ def __call__(self, days_back: int = 1) -> List[Dict]:
53
+ papers = []
54
+ base_dir = "daily_papers"
55
+
56
+ # Get dates to check
57
+ dates = [
58
+ (datetime.now() - timedelta(days=i)).strftime("%Y-%m-%d")
59
+ for i in range(days_back)
60
+ ]
61
+
62
+ # Load papers for each date
63
+ for date in dates:
64
+ file_path = os.path.join(base_dir, f"ai_papers_{date}.json")
65
+ if os.path.exists(file_path):
66
+ with open(file_path, 'r', encoding='utf-8') as f:
67
+ day_papers = json.load(f)
68
+ papers.extend(day_papers)
69
+
70
+ return papers
71
+
72
+ def save_daily_papers(output_dir: str = "daily_papers") -> List[Dict]:
73
+ """Helper function to save daily papers - not exposed as a tool"""
74
+ os.makedirs(output_dir, exist_ok=True)
75
+ today = datetime.now().strftime("%Y-%m-%d")
76
+
77
+ arxiv_tool = ArxivSearchTool()
78
+ papers = arxiv_tool(
79
+ query='cat:cs.AI OR cat:cs.LG OR cat:cs.CL OR "artificial intelligence"',
80
+ max_results=100
81
+ )
82
+
83
+ today_papers = [
84
+ paper for paper in papers
85
+ if paper.get('published') == today
86
+ ]
87
+
88
+ output_file = os.path.join(output_dir, f"ai_papers_{today}.json")
89
+ with open(output_file, 'w', encoding='utf-8') as f:
90
+ json.dump(today_papers, f, indent=2)
91
+
92
+ return today_papers