First_agent_template

Running

App Files Files Community

Ferocious0xide commited on Feb 17

Commit

addb0c2

verified ·

1 Parent(s): b43ab58

Update tools/arxiv_tool.py with API specs from Arxiv

Browse files

Files changed (1) hide show

tools/arxiv_tool.py +44 -20

tools/arxiv_tool.py CHANGED Viewed

@@ -1,4 +1,5 @@
-import arxiv
 from datetime import datetime, timedelta
 import json
 import os
@@ -13,29 +14,51 @@ class ArxivSearchTool(Tool):
     def __call__(self, query: str = "artificial intelligence",
                 max_results: int = 50) -> List[Dict]:
         try:
-            # Configure the search client
-            client = arxiv.Client()
-            # Create the search query
-            search = arxiv.Search(
-                query=query,
-                max_results=max_results,
-                sort_by=arxiv.SortCriterion.SubmittedDate
-            )
-            # Get results
             results = []
-            for paper in client.results(search):
                 result = {
-                    'title': paper.title,
-                    'authors': [str(author) for author in paper.authors],
-                    'summary': paper.summary,
-                    'published': paper.published.strftime("%Y-%m-%d"),
-                    'pdf_url': paper.pdf_url,
-                    'entry_id': paper.entry_id,
-                    'primary_category': paper.primary_category,
-                    'categories': paper.categories
                 }
                 results.append(result)
@@ -80,9 +103,10 @@ def save_daily_papers(output_dir: str = "daily_papers") -> List[Dict]:
         max_results=100
     )
     today_papers = [
         paper for paper in papers
-        if paper.get('published') == today
     ]
     output_file = os.path.join(output_dir, f"ai_papers_{today}.json")

+import urllib.request
+import xml.etree.ElementTree as ET
 from datetime import datetime, timedelta
 import json
 import os
     def __call__(self, query: str = "artificial intelligence",
                 max_results: int = 50) -> List[Dict]:
+        """Search ArXiv using their API.
+        Args:
+            query: Search query string
+            max_results: Maximum number of results to return
+        Returns:
+            List[Dict]: List of paper results with metadata
+        """
         try:
+            # Construct the API URL
+            base_url = 'http://export.arxiv.org/api/query?'
+            query_params = {
+                'search_query': query,
+                'start': 0,
+                'max_results': max_results
+            }
+            # Create the full URL
+            url = base_url + urllib.parse.urlencode(query_params)
+            # Make the request
+            response = urllib.request.urlopen(url)
+            data = response.read().decode('utf-8')
+            # Parse the Atom XML response
+            root = ET.fromstring(data)
+            # Define the Atom namespace
+            ns = {'atom': 'http://www.w3.org/2005/Atom',
+                  'arxiv': 'http://arxiv.org/schemas/atom'}
             results = []
+            for entry in root.findall('atom:entry', ns):
+                # Extract paper details
                 result = {
+                    'title': entry.find('atom:title', ns).text.strip(),
+                    'authors': [author.find('atom:name', ns).text
+                              for author in entry.findall('atom:author', ns)],
+                    'summary': entry.find('atom:summary', ns).text.strip() if entry.find('atom:summary', ns) is not None else '',
+                    'published': entry.find('atom:published', ns).text.strip(),
+                    'id': entry.find('atom:id', ns).text.strip(),
+                    'pdf_url': next((link.get('href') for link in entry.findall('atom:link', ns)
+                                   if link.get('type') == 'application/pdf'), None),
+                    'categories': [cat.get('term') for cat in entry.findall('atom:category', ns)]
                 }
                 results.append(result)
         max_results=100
     )
+    # Filter for papers published today
     today_papers = [
         paper for paper in papers
+        if paper.get('published', '').startswith(today)
     ]
     output_file = os.path.join(output_dir, f"ai_papers_{today}.json")