From d2814bc3e436e5fb48a6ca8cc7fdb2ba182e7d37 Mon Sep 17 00:00:00 2001 From: olaughter Date: Mon, 11 Aug 2025 20:36:34 +0100 Subject: [PATCH] Add ability to pass extra params for Arxiv api Makes the behaviour of Arxiv interactions more customisable by allowing extra query parameters to be set in advance. This is useful when wanting to look at the newest papers, or to include a list of specific papers and could also be used flexibley for other configurations and to support future query parameter options. --- .../tools/arxiv_paper_tool/Examples.md | 4 +++ crewai_tools/tools/arxiv_paper_tool/README.md | 18 ++++++++++- .../arxiv_paper_tool/arxiv_paper_tool.py | 20 ++++++++++-- .../arxiv_paper_tool/arxiv_paper_tool_test.py | 32 +++++++++++++++++++ 4 files changed, 71 insertions(+), 3 deletions(-) diff --git a/crewai_tools/tools/arxiv_paper_tool/Examples.md b/crewai_tools/tools/arxiv_paper_tool/Examples.md index 676fa410..e062ef30 100644 --- a/crewai_tools/tools/arxiv_paper_tool/Examples.md +++ b/crewai_tools/tools/arxiv_paper_tool/Examples.md @@ -41,6 +41,10 @@ tool = ArxivPaperTool( download_pdfs=True, save_dir=save_dir, use_title_as_filename=True + extra_params = { + "sortBy": "relevance", + "sortOrder": "descending" + } ) tool.result_as_answer = True #Required,otherwise diff --git a/crewai_tools/tools/arxiv_paper_tool/README.md b/crewai_tools/tools/arxiv_paper_tool/README.md index f9ef56bd..e92cd9b9 100644 --- a/crewai_tools/tools/arxiv_paper_tool/README.md +++ b/crewai_tools/tools/arxiv_paper_tool/README.md @@ -29,6 +29,7 @@ This tool: | `download_pdfs` | `bool` | ❌ | Whether to download the corresponding PDFs. Defaults to `False`. | | `save_dir` | `str` | ❌ | Directory to save PDFs (created if it doesn’t exist). Defaults to `./arxiv_pdfs`. | | `use_title_as_filename` | `bool` | ❌ | Use the paper title as the filename (sanitized). Defaults to `False`. | +| `extra_params` | `dict[str, str]` | ❌ | Extend or override the query parameters used for a search, see options at [Arxiv's API documentation](https://info.arxiv.org/help/api/user-manual.html#311-query-interface) | --- @@ -100,9 +101,24 @@ result = tool._run( print(result) ``` +### Example 5: Order results by the most recently submitted + +```python +tool = ArxivPaperTool( + extra_params={ + "sortBy": "submittedDate", + "sortOrder": "descending" + } +) +result = tool._run( + search_query="explainable ai", +) +print(result) +``` + --- -### Example 5: All Options Combined +### Example 6: All Options Combined ```python tool = ArxivPaperTool( diff --git a/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool.py b/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool.py index acd6bbe7..81e4f320 100644 --- a/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool.py +++ b/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool.py @@ -29,11 +29,18 @@ class ArxivPaperTool(BaseTool): package_dependencies: List[str] = ["pydantic"] env_vars: List[EnvVar] = [] - def __init__(self, download_pdfs=False, save_dir="./arxiv_pdfs", use_title_as_filename=False): + def __init__( + self, + download_pdfs=False, + save_dir="./arxiv_pdfs", + use_title_as_filename=False, + extra_params=None, + ): super().__init__() self.download_pdfs = download_pdfs self.save_dir = save_dir self.use_title_as_filename = use_title_as_filename + self.extra_params = extra_params def _run(self, search_query: str, max_results: int = 5) -> str: try: @@ -68,7 +75,16 @@ def _run(self, search_query: str, max_results: int = 5) -> str: def fetch_arxiv_data(self, search_query: str, max_results: int) -> List[dict]: - api_url = f"{self.BASE_API_URL}?search_query={urllib.parse.quote(search_query)}&start=0&max_results={max_results}" + params = { + 'search_query': search_query, + 'start': 0, + 'max_results': max_results, + } + if self.extra_params: + params = {**params, **self.extra_params} + + query = urllib.parse.urlencode(params) + api_url = f"{self.BASE_API_URL}?{query}" logger.info(f"Fetching data from Arxiv API: {api_url}") try: diff --git a/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool_test.py b/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool_test.py index 4f8747d2..bf250eb2 100644 --- a/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool_test.py +++ b/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool_test.py @@ -111,3 +111,35 @@ def test_run_with_max_results(mock_fetch, tool): result = tool._run(search_query="test", max_results=100) assert result.count("Title:") == 100 + + +@patch("urllib.request.urlopen") +def test_fetch_arxiv_data_with_extra_params(mock_urlopen): + mock_response = MagicMock() + mock_response.status = 200 + mock_response.read.return_value = mock_arxiv_response().encode("utf-8") + mock_urlopen.return_value.__enter__.return_value = mock_response + + tool = ArxivPaperTool( + extra_params = { + "sortBy": "lastUpdatedDate", + "sortOrder": "descending", + "start": 10, + } + ) + tool.fetch_arxiv_data("transformer", 1) + + expected_url = "".join( + [ + "http://export.arxiv.org/api/query", + "?search_query=transformer", + "&start=10", + "&max_results=1", + "&sortBy=lastUpdatedDate", + "&sortOrder=descending", + ] + ) + mock_urlopen.assert_called_once_with( + expected_url, + timeout=10, + )