From fd371cff44f31f26e9c15bdf3cb9c166a3f8127b Mon Sep 17 00:00:00 2001
From: Nils Herrmann <nils18@live.com.mx>
Date: Wed, 7 May 2025 18:19:02 +0200
Subject: [PATCH 1/9] Implement ScienceDirectSearch using the PUT method

---
 .../sciencedirect/ScienceDirectSearch.rst     | 163 ++++++++++-------
 .../sciencedirect/sciencedirect_search.py     | 165 +++++++----------
 .../tests/test_ScienceDirectSearch.py         | 100 ++++++-----
 pybliometrics/superclasses/base.py            | 115 +++++++-----
 pybliometrics/superclasses/search.py          |  28 ++-
 pybliometrics/utils/constants.py              |   1 +
 pybliometrics/utils/get_content.py            | 167 +++++++++---------
 pybliometrics/utils/parse_content.py          |  14 +-
 8 files changed, 405 insertions(+), 348 deletions(-)

diff --git a/docs/reference/sciencedirect/ScienceDirectSearch.rst b/docs/reference/sciencedirect/ScienceDirectSearch.rst
index 955daaf8..bc2d77ce 100644
--- a/docs/reference/sciencedirect/ScienceDirectSearch.rst
+++ b/docs/reference/sciencedirect/ScienceDirectSearch.rst
@@ -1,9 +1,33 @@
 pybliometrics.sciencedirect.ScienceDirectSearch
-=================================================
-
-`ScopusSearch()` implements the `ScienceDirect Search API <https://nonprod-devportal.elsevier.com/documentation/ScienceDirectSearchAPI.wadl>`_.  It executes a query to search for documents and retrieves the resulting records.
-Any query that works in the `Advanced Document Search on sciencedirect.com <https://www.sciencedirect.com/search/entry>`_ will work.
-For a complete guide on how to query check the `documentation <https://service.elsevier.com/app/answers/detail/a_id/25974/supporthub/sciencedirect/>`_.
+===============================================
+
+`ScienceDirectSearch()` implements the `ScienceDirect Search API <https://nonprod-devportal.elsevier.com/documentation/ScienceDirectSearchAPI.wadl>`_ using the `PUT method`.  It executes a query to search for documents and retrieves the resulting records.
+The class takes a `query`` dictionary as input which has to follow this schema:
+
+.. code-block:: text
+
+    {
+        authors: string,
+        date: string,
+        display: {
+            highlights: boolean,
+            offset: integer,
+            show: integer,
+            sortBy: string
+        },
+        filters: {
+            openAccess: boolean
+        },
+        issue: string,
+        loadedAfter: string,
+        page: string,
+        pub: string,
+        qs: string,
+        title: string,
+        volume: string
+    }
+
+For a more detailed description of the parameters, please refer to the `ScienceDirect Search API migration documentation <https://dev.elsevier.com/tecdoc_sdsearch_migration.html>`_.
 
 .. currentmodule:: pybliometrics.sciencedirect
 .. contents:: Table of Contents
@@ -19,16 +43,29 @@ Documentation
 Examples
 --------
 
-The class is initialized with a search query. To see the download progress, set `verbose=True`.
+The class is initialized with a search query.
+We can pass the field `qs` to search for a specific keywords.
+Using `verbose=True` will print the progress of the download.
 
 .. code-block:: python
 
     >>> from pybliometrics.sciencedirect import ScienceDirectSearch, init
     >>> init()
     >>> # Retrieve documents based on the search query  
-    >>> sds = ScienceDirectSearch('"neural radiance fields" AND "3D" AND YEAR(2024)', verbose=True)
-    Downloading results for query ""neural radiance fields" AND "3D" AND YEAR(2024)":
-    100%|██████████| 8/8 [00:05<00:00,  1.39it/s]
+    >>> query = query = {'qs': '"neural radiance fields" AND "3D rendering"', 'date': '2024'}
+    >>> sds = ScienceDirectSearch(query, verbose=True)
+    Downloading results for query "{'qs': '"neural radiance fields" AND "3D rendering"', 'date': '2024', 'display': {'offset': 0, 'show': 100, 'sortBy': 'date'}, 'cursor': '*'}":
+    100%|██████████| 1/1 [00:00<00:00,  3.23it/s]
+
+
+To check the number of results, use the method `get_results_size()`.
+
+.. code-block:: python
+
+    >>> # Check the number of results
+    >>> sds.get_results_size()
+    10
+
 
 To access the results, use the attribute `results` which contains a list of `Document` namedtuples.
 
@@ -36,9 +73,9 @@ To access the results, use the attribute `results` which contains a list of `Doc
 
     >>> # Access the results
     >>> results = sds.results
-    [Document(authors='Dong He;Wenhua Qian;Jinde Cao', first_author='Dong He', doi='10.1016/j.cag.2025.104181', title='GEAST-RF: Geometry Enhanced 3D Arbitrary Style Transfer Via Neural Radiance Fields', link='https://www.sciencedirect.com/science/article/pii/S0097849325000202?dgcid=api_sd_search-api-endpoint', load_date='2025-02-16T00:00:00.000Z', openaccess_status=False, pii='S0097849325000202', coverDate='2025-02-16', endingPage=None, publicationName='Computers & Graphics', startingPage='104181', api_link='https://api.elsevier.com/content/article/pii/S0097849325000202', volume=None),
-     Document(authors='Qicheng Xu;Min Hu;Xitao Zhang', first_author='Qicheng Xu', doi='10.1016/j.asr.2025.01.065', title='A neural radiance fields method for 3D reconstruction of space target', link='https://www.sciencedirect.com/science/article/pii/S0273117725000973?dgcid=api_sd_search-api-endpoint', load_date='2025-02-01T00:00:00.000Z', openaccess_status=False, pii='S0273117725000973', coverDate='2025-02-01', endingPage=None, publicationName='Advances in Space Research', startingPage=None, api_link='https://api.elsevier.com/content/article/pii/S0273117725000973', volume=None),
-     Document(authors='Jian Liu;Zhen Yu', first_author='Jian Liu', doi='10.1016/j.neucom.2025.129420', title='SA3D-L: A lightweight model for 3D object segmentation using neural radiance fields', link='https://www.sciencedirect.com/science/article/pii/S092523122500092X?dgcid=api_sd_search-api-endpoint', load_date='2025-01-14T00:00:00.000Z', openaccess_status=False, pii='S092523122500092X', coverDate='2025-03-28', endingPage=None, publicationName='Neurocomputing', startingPage='129420', api_link='https://api.elsevier.com/content/article/pii/S092523122500092X', volume='623'),
+    [Document(authors='Geontae Kim; Youngjin Cha', doi='10.1016/j.autcon.2024.105878', loadDate='2024-11-19T00:00:00.000Z', openAccess=True, first_page=105878, last_page=None, pii='S0926580524006149', publicationDate='2024-12-15', sourceTitle='Automation in Construction', title='3D Pixelwise damage mapping using a deep attention based modified Nerfacto', uri='https://www.sciencedirect.com/science/article/pii/S0926580524006149?dgcid=api_sd_search-api-endpoint', volumeIssue='Volume 168, Part B'),
+     Document(authors='Akram Akbar; Chun Liu; Zeran Xu', doi='10.1016/j.aei.2024.102913', loadDate='2024-11-16T00:00:00.000Z', openAccess=False, first_page=102913, last_page=None, pii='S1474034624005640', publicationDate='2024-10-31', sourceTitle='Advanced Engineering Informatics', title='Scene information guided aerial photogrammetric mission recomposition towards detailed level building reconstruction', uri='https://www.sciencedirect.com/science/article/pii/S1474034624005640?dgcid=api_sd_search-api-endpoint', volumeIssue='Volume 62, Part D'),
+     Document(authors='Ruxandra Stoean; Nebojsa Bacanin; Leonard Ionescu', doi='10.1016/j.culher.2024.07.008', loadDate='2024-08-09T00:00:00.000Z', openAccess=False, first_page=18, last_page=26, pii='S1296207424001468', publicationDate='2024-10-31', sourceTitle='Journal of Cultural Heritage', title='Bridging the past and present: AI-driven 3D restoration of degraded artefacts for museum digital display', uri='https://www.sciencedirect.com/science/article/pii/S1296207424001468?dgcid=api_sd_search-api-endpoint', volumeIssue='Volume 69'),
      ...]
 
 The list of results can be cast into a Pandas DataFrame.
@@ -50,11 +87,12 @@ The list of results can be cast into a Pandas DataFrame.
     >>> df = pd.DataFrame(sds.results)
     >>> # Display available fields
     >>> df.columns
-    Index(['eid', 'filename', 'height', 'mimetype', 'ref', 'size', 'type', 'url',
-       'width'],
+    Index(['authors', 'doi', 'loadDate', 'openAccess', 'first_page', 'last_page',
+       'pii', 'publicationDate', 'sourceTitle', 'title', 'uri', 'volumeIssue'],
       dtype='object')
     >>> # Get shape of the DataFrame (rows x columns)
-    (200, 14)
+    >>> df.shape
+    (10, 12)
     >>> # Display the first 3 rows
     >>> df.head(3)
 
@@ -82,74 +120,65 @@ The list of results can be cast into a Pandas DataFrame.
         <tr style="text-align: right;">
         <th></th>
         <th>authors</th>
-        <th>first_author</th>
         <th>doi</th>
-        <th>title</th>
-        <th>link</th>
-        <th>load_date</th>
-        <th>openaccess_status</th>
+        <th>loadDate</th>
+        <th>openAccess</th>
+        <th>first_page</th>
+        <th>last_page</th>
         <th>pii</th>
-        <th>coverDate</th>
-        <th>endingPage</th>
-        <th>publicationName</th>
-        <th>startingPage</th>
-        <th>api_link</th>
-        <th>volume</th>
+        <th>publicationDate</th>
+        <th>sourceTitle</th>
+        <th>title</th>
+        <th>uri</th>
+        <th>volumeIssue</th>
         </tr>
     </thead>
     <tbody>
         <tr>
         <th>0</th>
-        <td>Dong He;Wenhua Qian;Jinde Cao</td>
-        <td>Dong He</td>
-        <td>10.1016/j.cag.2025.104181</td>
-        <td>GEAST-RF: Geometry Enhanced 3D Arbitrary Style...</td>
+        <td>Geontae Kim; Youngjin Cha</td>
+        <td>10.1016/j.autcon.2024.105878</td>
+        <td>2024-11-19T00:00:00.000Z</td>
+        <td>True</td>
+        <td>105878</td>
+        <td>NaN</td>
+        <td>S0926580524006149</td>
+        <td>2024-12-15</td>
+        <td>Automation in Construction</td>
+        <td>3D Pixelwise damage mapping using a deep atten...</td>
         <td>https://www.sciencedirect.com/science/article/...</td>
-        <td>2025-02-16T00:00:00.000Z</td>
-        <td>False</td>
-        <td>S0097849325000202</td>
-        <td>2025-02-16</td>
-        <td>None</td>
-        <td>Computers &amp; Graphics</td>
-        <td>104181</td>
-        <td>https://api.elsevier.com/content/article/pii/S...</td>
-        <td>None</td>
+        <td>Volume 168, Part B</td>
         </tr>
         <tr>
         <th>1</th>
-        <td>Qicheng Xu;Min Hu;Xitao Zhang</td>
-        <td>Qicheng Xu</td>
-        <td>10.1016/j.asr.2025.01.065</td>
-        <td>A neural radiance fields method for 3D reconst...</td>
-        <td>https://www.sciencedirect.com/science/article/...</td>
-        <td>2025-02-01T00:00:00.000Z</td>
+        <td>Akram Akbar; Chun Liu; Zeran Xu</td>
+        <td>10.1016/j.aei.2024.102913</td>
+        <td>2024-11-16T00:00:00.000Z</td>
         <td>False</td>
-        <td>S0273117725000973</td>
-        <td>2025-02-01</td>
-        <td>None</td>
-        <td>Advances in Space Research</td>
-        <td>None</td>
-        <td>https://api.elsevier.com/content/article/pii/S...</td>
-        <td>None</td>
+        <td>102913</td>
+        <td>NaN</td>
+        <td>S1474034624005640</td>
+        <td>2024-10-31</td>
+        <td>Advanced Engineering Informatics</td>
+        <td>Scene information guided aerial photogrammetri...</td>
+        <td>https://www.sciencedirect.com/science/article/...</td>
+        <td>Volume 62, Part D</td>
         </tr>
         <tr>
         <th>2</th>
-        <td>Jian Liu;Zhen Yu</td>
-        <td>Jian Liu</td>
-        <td>10.1016/j.neucom.2025.129420</td>
-        <td>SA3D-L: A lightweight model for 3D object segm...</td>
-        <td>https://www.sciencedirect.com/science/article/...</td>
-        <td>2025-01-14T00:00:00.000Z</td>
+        <td>Ruxandra Stoean; Nebojsa Bacanin; Leonard Ionescu</td>
+        <td>10.1016/j.culher.2024.07.008</td>
+        <td>2024-08-09T00:00:00.000Z</td>
         <td>False</td>
-        <td>S092523122500092X</td>
-        <td>2025-03-28</td>
-        <td>None</td>
-        <td>Neurocomputing</td>
-        <td>129420</td>
-        <td>https://api.elsevier.com/content/article/pii/S...</td>
-        <td>623</td>
+        <td>18</td>
+        <td>26.0</td>
+        <td>S1296207424001468</td>
+        <td>2024-10-31</td>
+        <td>Journal of Cultural Heritage</td>
+        <td>Bridging the past and present: AI-driven 3D re...</td>
+        <td>https://www.sciencedirect.com/science/article/...</td>
+        <td>Volume 69</td>
         </tr>
     </tbody>
     </table>
-    </div>
-
+    </div>
\ No newline at end of file
diff --git a/pybliometrics/sciencedirect/sciencedirect_search.py b/pybliometrics/sciencedirect/sciencedirect_search.py
index 75873e9b..bbeb18cd 100644
--- a/pybliometrics/sciencedirect/sciencedirect_search.py
+++ b/pybliometrics/sciencedirect/sciencedirect_search.py
@@ -3,20 +3,21 @@
 
 from pybliometrics.superclasses import Search
 from pybliometrics.utils import check_field_consistency, chained_get, \
-    check_integrity, check_parameter_value, deduplicate, \
+    check_integrity, check_parameter_value, deduplicate, make_int_if_possible, \
     make_search_summary, VIEWS
 
 
 class ScienceDirectSearch(Search):
     @property
-    def results(self) -> Optional[list[namedtuple]]:
-        """A list of namedtuples in the form `(authors first_author doi title link
-        load_date openaccess_status pii coverDate endingPage publicationName startingPage
-        api_link volume)`.
+    def results(self) -> Optional[list]:
+        """
+        A list of namedtuples in the form `(authors doi loadDate openAccess first_page last_page
+        pii publicationDate sourceTitle title uri volumeIssue)`.
 
-        Field definitions correspond to the `ScienceDirect Search Views
-        <https://dev.elsevier.com/sd_search_views.htmll>`__ and return the
-        values as-is, except for `authors` which are joined on `";"`.
+        Field definitions correspond to the `ScienceDirect Search API Migration Documentation
+        <https://dev.elsevier.com/tecdoc_sdsearch_migration.html>`__ and return the
+        values as-is, except for `authors` which are joined on `";"` and pages which are
+        parsed into `first_page` and `last_page`.
 
         Raises
         ------
@@ -29,49 +30,36 @@ def results(self) -> Optional[list[namedtuple]]:
         The list of authors and the list of affiliations per author are
         deduplicated.
         """
-        fields = 'authors first_author doi title link load_date openaccess_status pii '\
-            'coverDate endingPage publicationName startingPage api_link volume'
+        fields = 'authors doi loadDate openAccess first_page last_page pii publicationDate ' \
+                 'sourceTitle title uri volumeIssue'
         doc = namedtuple('Document', fields)
         check_field_consistency(self._integrity, fields)
         # Parse elements one-by-one
         out = []
         for item in self._json:
             # Get authors and create ";" separated string
-            authors_list = self._get_authors(item)
-            authors_list = deduplicate(authors_list)
-            authors = ';'.join(authors_list)
-            # Get links
-            links_found = item.get('link')
-            links = {'api_link': None, 'scidir': None}
-            for link in links_found:
-                if link.get('@ref') == 'self':
-                    links['api_link'] = link.get('@href')
-                elif link.get('@ref') == 'scidir':
-                    links['scidir'] = link.get('@href')
-            # Get doi
-            doi = item.get("prism:doi") or item.get("dc:identifier")[4:] if item.get("dc:identifier") else None
+            authors_list = deduplicate([a.get('name') for a in item.get('authors', {})])
+            authors = "; ".join(authors_list)
             new = doc(
                 authors=authors,
-                first_author=item.get('dc:creator'),
-                doi=doi,
-                title=item.get("dc:title"),
-                link=links["scidir"],
-                load_date=item.get("load-date"),
-                openaccess_status=item.get("openaccess"),
-                pii=item.get("pii"),
-                coverDate=item.get("prism:coverDate"),
-                endingPage=item.get("prism:endingPage"),
-                publicationName=item.get("prism:publicationName"),
-                startingPage=item.get("prism:startingPage"),
-                api_link=links["api_link"] or item.get("prism:url"),
-                volume=item.get("prism:volume")
+                doi=item.get('doi'),
+                loadDate=item.get('loadDate'),
+                openAccess=item.get('openAccess'),
+                first_page=make_int_if_possible(chained_get(item, ('pages', 'first'))),
+                last_page=make_int_if_possible(chained_get(item, ('pages', 'last'))),
+                pii=item.get('pii'),
+                publicationDate=item.get('publicationDate'),
+                sourceTitle=item.get('sourceTitle'),
+                title=item.get('title'),
+                uri=item.get('uri'),
+                volumeIssue=item.get('volumeIssue')
             )
             out.append(new)
         check_integrity(out, self._integrity, self._action)
         return out or None
 
     def __init__(self,
-                 query: str,
+                 query: dict,
                  refresh: Union[bool, int] = False,
                  view: Optional[str] = None,
                  verbose: bool = False,
@@ -79,40 +67,44 @@ def __init__(self,
                  integrity_fields: Optional[Union[list[str], tuple[str, ...]]] = None,
                  integrity_action: str = "raise",
                  subscriber: bool = True,
-                 **kwds: str
                  ) -> None:
-        """Interaction with the ScienceDirect Search API. This represents a search against the
-        ScienceDirect cluster, which contains serial/nonserial full-text articles. Note that this API
-        replicates the search experience on `ScienceDirect <www.sciencedirect.com>`__.
-
-        :param query: A string of the query as used in the `ScienceDirect Search <https://dev.elsevier.com/tecdoc_sdsearch_migration.html>`__.
-        :param refresh: Whether to refresh the cached file if it exists or not.
-                        If int is passed, cached file will be refreshed if the
-                        number of days since last modification exceeds that value.
-        :param view: Which view to use for the query, see `the documentation <https://dev.elsevier.com/sd_search_views.html>`__.
-                     Allowed values: `STANDARD`.
-        :param verbose: Whether to print a download progress bar.
-        :param download: Whether to download results (if they have not been
-                         cached).
-        :param integrity_fields: A list or tuple with the names of fields whose completeness should
-                                 be checked.  `ArticleMetadata` will perform the
-                                 action specified in `integrity_action` if
-                                 elements in these fields are missing.  This
-                                 helps to avoid idiosynchratically missing
-                                 elements that should always be present
-                                 (e.g., doi or authors).
-        :param integrity_action: What to do in case integrity of provided fields
-                                 cannot be verified.  Possible actions:
-                                 - `"raise"`: Raise an `AttributeError`
-                                 - `"warn"`: Raise a `UserWarning`
-        :param subscriber: Whether you access ScienceDirect with a subscription or not.
-                           For subscribers, ScienceDirect's cursor navigation will be
-                           used.  Sets the number of entries in each query
-                           iteration to the maximum number allowed by the
-                           corresponding view.
-        :param kwds: Keywords passed on as query parameters.  Must contain
-                     fields and values mentioned in the `API specification <https://dev.elsevier.com/documentation/ArticleMetadataAPI.wadl>`__.
-
+        """
+        Interaction with the ScienceDirect Search API using the `PUT` method.
+        See the official `documentation <https://dev.elsevier.com/tecdoc_sdsearch_migration.html>`__ 
+        for more details.
+
+        Parameters
+        ----------
+        query : dict
+            The query to be sent to the API, e.g.,
+            {'qs': '"Neural Networks" AND "Shapley"', 'date': '2019-2020'}
+
+        refresh : bool or int, optional
+            Whether to refresh the cached file. If an int is passed, the cache
+            will refresh if older than that many days.
+
+        view : str, optional
+            The API view to use. Default is "STANDARD".
+
+        verbose : bool, optional
+            Whether to print a download progress bar.
+
+        download : bool, optional
+            Whether to download results (if they haven't been cached).
+
+        integrity_fields : list of str or tuple of str, optional
+            Fields whose completeness should be checked. If any field is missing,
+            the `integrity_action` will be triggered.
+
+        integrity_action : {'raise', 'warn'}, optional
+            What to do if required fields are missing:
+            
+            - 'raise' : Raise an AttributeError
+            - 'warn' : Emit a UserWarning
+
+        subscriber : bool, optional
+            If True, cursor navigation is enabled, allowing more than 5,000 results.
+        
         Raises
         ------
         ScopusQueryError
@@ -122,16 +114,7 @@ def __init__(self,
             If any of the parameters `integrity_action`, `refresh` or `view`
             is not one of the allowed values.
 
-        Notes
-        -----
-        The directory for cached results is `{path}/{view}/{fname}`,
-        where `path` is specified in your configuration file and `fname` is
-        the md5-hashed version of `query`.
-
-        The ScienceDirect Search API V2 has two available interfaces: `PUT` and `GET`. This library uses the
-        `GET` interface.
         """
-        # Check view or set to default
         if view:
             check_parameter_value(view, VIEWS['ScienceDirectSearch'], "view")
         else:
@@ -140,29 +123,17 @@ def __init__(self,
         allowed = ("warn", "raise")
         check_parameter_value(integrity_action, allowed, "integrity_action")
 
-        # Query
         self._action = integrity_action
         self._integrity = integrity_fields or []
         self._refresh = refresh
         self._query = query
         self._view = view
-        Search.__init__(self, query=query, download=download, verbose=verbose, **kwds)
+
+        Search.__init__(self, query=query,
+                        cursor=subscriber, download=download,
+                        verbose=verbose)
 
     def __str__(self):
         """Print a summary string."""
-        return make_search_summary(self, "document", self.get_dois())
-
-    def get_dois(self):
-        """DOIs of retrieved documents."""
-        return [d.get("prism:doi") or d.get("dc:identifier")[4:] if d.get("dc:identifier") else None for d in self._json]
-
-    def _get_authors(self, item: dict) -> list:
-        """Auxiliary function to get the authors."""
-        authors_data = chained_get(item, ['authors', 'author'], [])
-        if isinstance(authors_data, list):
-            authors_list = [a.get('$') for a in authors_data]
-        elif isinstance(authors_data, str):
-            authors_list = [authors_data]
-        else:
-            authors_list = []
-        return authors_list
+        dois = [d.doi for d in self.results] if self.results else []
+        return make_search_summary(self, "document", dois)
diff --git a/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py b/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py
index b4589c59..acfd823d 100644
--- a/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py
+++ b/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py
@@ -1,13 +1,24 @@
 """Tests for sciencedirect.ScienceDirectSearch"""
 from collections import namedtuple
 
-from pybliometrics.exception import Scopus400Error
+import pytest
+
 from pybliometrics.sciencedirect import ScienceDirectSearch, init
 
 init()
 
-sds_standard = ScienceDirectSearch('TITLE("Assessing LLMs in malicious code deobfuscation of real-world malware campaigns") AND DATE(2012)', view="STANDARD", refresh=30)
-sds_empty = ScienceDirectSearch('TITLE("Not a very realistic title")', view="STANDARD", refresh=30)
+one_article_query = {'title': 'Assessing LLMs in malicious code deobfuscation of real-world malware campaigns',
+                     'date': '2024'}
+sds_standard = ScienceDirectSearch(one_article_query, refresh=30)
+
+empty_query = {'title': 'Not a realistic title', 'date': '2012'}
+sds_empty = ScienceDirectSearch(empty_query, view="STANDARD", refresh=30)
+
+huge_query = {'qs': 'Neural Networks', 'date': '2015-2020'}
+sds_huge = ScienceDirectSearch(huge_query, view="STANDARD", download=False, refresh=30)
+
+pagination_query = {'qs': '"Neural Networks" AND "Shapley"', 'date': '2020'}
+sds_pagination = ScienceDirectSearch(pagination_query, view="STANDARD", refresh=30)
 
 
 def test_empty_results():
@@ -16,64 +27,61 @@ def test_empty_results():
 
 
 def test_all_fields():
-    fields = 'authors first_author doi title link load_date openaccess_status pii '\
-        'coverDate endingPage publicationName startingPage api_link volume'
-    doc = namedtuple("Document", fields)
+    fields = 'authors doi loadDate openAccess first_page last_page pii publicationDate ' \
+             'sourceTitle title uri volumeIssue'
+    doc = namedtuple('Document', fields)
 
     expected_standard_doc = doc(
-        authors="Constantinos Patsakis;Fran Casino;Nikolaos Lykousas",
-        first_author="Constantinos Patsakis",
-        doi="10.1016/j.eswa.2024.124912",
-        title="Assessing LLMs in malicious code deobfuscation of real-world malware campaigns",
-        link="https://www.sciencedirect.com/science/article/pii/S0957417424017792?dgcid=api_sd_search-api-endpoint",
-        load_date="2024-07-31T00:00:00.000Z",
-        openaccess_status=True,
-        pii="S0957417424017792",
-        coverDate="2024-12-05",
-        endingPage=None,
-        publicationName="Expert Systems with Applications",
-        startingPage="124912",
-        api_link="https://api.elsevier.com/content/article/pii/S0957417424017792",
-        volume="256",
+        authors='Constantinos Patsakis; Fran Casino; Nikolaos Lykousas',
+        doi='10.1016/j.eswa.2024.124912',
+        loadDate="2024-07-31T00:00:00.000Z",
+        openAccess=True,
+        first_page=124912,
+        last_page=None,
+        pii='S0957417424017792',
+        publicationDate='2024-12-05',
+        sourceTitle='Expert Systems with Applications',
+        title='Assessing LLMs in malicious code deobfuscation of real-world malware campaigns',
+        uri='https://www.sciencedirect.com/science/article/pii/S0957417424017792?dgcid=api_sd_search-api-endpoint',
+        volumeIssue='Volume 256'
     )
+
     assert sds_standard.results[0] == expected_standard_doc
 
+    expected_last_document = doc(
+        authors='Elhadji Amadou Oury Diallo; Ayumi Sugiyama; Toshiharu Sugawara',
+        doi='10.1016/j.neucom.2018.08.094',
+        loadDate='2019-04-25T00:00:00.000Z',
+        openAccess=False,
+        first_page=230,
+        last_page=240,
+        pii='S0925231219304424',
+        publicationDate='2020-07-05',
+        sourceTitle='Neurocomputing',
+        title='Coordinated behavior of cooperative agents using deep reinforcement learning',
+        uri='https://www.sciencedirect.com/science/article/pii/S0925231219304424?dgcid=api_sd_search-api-endpoint',
+        volumeIssue='Volume 396'
+    )
+    assert sds_pagination.results[-1] == expected_last_document
+
+
 
 def test_field_consistency():
-    am_wrong_field = ScienceDirectSearch('TITLE("Assessing LLMs in malicious code deobfuscation of real-world malware campaigns") AND DATE(2012)',
+    am_wrong_field = ScienceDirectSearch(one_article_query,
                                  integrity_fields=["notExistingField"],
                                  integrity_action="warn",
                                  view="STANDARD",
                                  refresh=30)
-    try:
-        am_wrong_field.results
-    except ValueError:
-        pass
-    except Exception as e:
-        raise AssertionError(f"Unexpected exception type: {type(e).__name__}")
-    else:
-        raise AssertionError("Expected ValueError but no exception was raised")
+    with pytest.raises(ValueError):
+        _ = am_wrong_field.results
 
 
 def test_length():
     assert len(sds_standard.results) == sds_standard._n
     assert len(sds_standard.results) == sds_standard._n
-
+    assert sds_huge.get_results_size() > 156_000
+    assert len(sds_pagination.results) == 127
 
 def test_string():
-    str_start = ('Search \'TITLE("Assessing LLMs in malicious code deobfuscation of '
-    'real-world malware campaigns") AND DATE(2012)\' yielded 1 document as of')
-    assert sds_standard.__str__().startswith(str_start)
-
-
-def test_wrong_query():
-    try:
-        ScienceDirectSearch(
-            'Th(s querY - has M&ny ( Errors', view="STANDARD", refresh=30
-        )
-    except Scopus400Error:
-        pass
-    except Exception as e:
-        raise AssertionError(f"Unexpected exception type: {type(e).__name__}")
-    else:
-        raise AssertionError("Expected Scopus400Error but no exception was raised")
+    expected_str = "Search '{'title': 'Assessing LLMs in malicious code deobfuscation of real-world malware campaigns', 'date': '2024'}' yielded 1 document as of 2025-05-07:\n    10.1016/j.eswa.2024.124912"
+    assert str(sds_standard) == expected_str
diff --git a/pybliometrics/superclasses/base.py b/pybliometrics/superclasses/base.py
index b79c1b2f..eb543d09 100644
--- a/pybliometrics/superclasses/base.py
+++ b/pybliometrics/superclasses/base.py
@@ -54,10 +54,12 @@ def __init__(self,
         ab_ref_retrieval = (api == 'AbstractRetrieval') and (params['view'] == 'REF')
         # Check if object retrieval
         obj_retrieval = (api == 'ObjectRetrieval')
+        # Check if ScienceDirect Search API
+        sciencedirect_search = (api == 'ScienceDirectSearch')
 
         if fname.exists() and not self._refresh:
             self._mdate = mod_ts
-            if search_request:
+            if search_request or sciencedirect_search:
                 self._json = [loads(line) for line in
                               fname.read_text().split("\n") if line]
                 self._n = len(self._json)
@@ -66,61 +68,84 @@ def __init__(self,
             else:
                 self._json = loads(fname.read_text())
         else:
-            resp = get_content(url, api, params, **kwds)
-            header = resp.headers
-
-            if ab_ref_retrieval:
-                kwds['startref'] = '1'
-                data = _get_all_refs(url, params, verbose, resp, **kwds)
-                self._json = data
-                data = [data]
-            elif search_request:
-                # Get number of results
+            if sciencedirect_search:
+                resp = get_content(url, api, params, 'PUT' ,**kwds)
+                header = resp.headers
                 res = resp.json()
-                n = int(res['search-results'].get('opensearch:totalResults', 0) or 0)
+                # Get the number of results
+                n = int(res.get('resultsFound', 0))
                 self._n = n
-                # Results size check
-                cursor_exists = "cursor" in params
-                if not cursor_exists and n > SEARCH_MAX_ENTRIES:
-                    # Stop if there are too many results
-                    text = f'Found {n:,} matches.  The query fails to return '\
-                           f'more than {SEARCH_MAX_ENTRIES} entries.  Change '\
-                           'your query such that it returns fewer entries.'
-                    raise ScopusQueryError(text)
                 self._json = []
-                # Download results page-wise
                 if download:
-                    data = res.get('search-results', {}).get('entry', [])
-                    if not n:
-                        data = ""
-                    if not cursor_exists:
-                        start = params["start"]
-                    # Download the remaining information in chunks
+                    data = res.get('results', [])
+                    n_chunks = ceil(n/params["display"]["show"])
                     if verbose:
-                        print(f'Downloading results for query "{params["query"]}":')
-                    n_chunks = ceil(n/params['count'])
-                    for i in tqdm(range(1, n_chunks), disable=not verbose,
-                                  initial=1, total=n_chunks):
-                        if cursor_exists:
-                            cursor = res['search-results']['cursor']['@next']
-                            params.update({'cursor': cursor})
-                        else:
-                            start += params["count"]
-                            params.update({'start': start})
-                        resp = get_content(url, api, params, **kwds)
+                        print(f'Downloading results for query "{params}":')
+                    for i in tqdm(range(1, n_chunks), disable=not verbose):
+                        params['display']['offset'] += params["display"]["show"]
+                        resp = get_content(url, api, params, 'PUT' ,**kwds)
                         res = resp.json()
-                        data.extend(res.get('search-results', {}).get('entry', []))
+                        data.extend(res.get('results', []))
                     header = resp.headers  # Use header of final call
                     self._json = data
                 else:
                     data = None
-            elif obj_retrieval:
-                self._object = resp.content
-                data = []
             else:
-                data = loads(resp.text)
-                self._json = data
-                data = [data]
+                resp = get_content(url, api, params, **kwds)
+                header = resp.headers
+
+                if ab_ref_retrieval:
+                    kwds['startref'] = '1'
+                    data = _get_all_refs(url, params, verbose, resp, **kwds)
+                    self._json = data
+                    data = [data]
+                elif search_request:
+                    # Get number of results
+                    res = resp.json()
+                    n = int(res['search-results'].get('opensearch:totalResults', 0) or 0)
+                    self._n = n
+                    # Results size check
+                    cursor_exists = "cursor" in params
+                    if not cursor_exists and n > SEARCH_MAX_ENTRIES:
+                        # Stop if there are too many results
+                        text = f'Found {n:,} matches.  The query fails to return '\
+                            f'more than {SEARCH_MAX_ENTRIES} entries.  Change '\
+                            'your query such that it returns fewer entries.'
+                        raise ScopusQueryError(text)
+                    self._json = []
+                    # Download results page-wise
+                    if download:
+                        data = res.get('search-results', {}).get('entry', [])
+                        if not n:
+                            data = ""
+                        if not cursor_exists:
+                            start = params["start"]
+                        # Download the remaining information in chunks
+                        if verbose:
+                            print(f'Downloading results for query "{params["query"]}":')
+                        n_chunks = ceil(n/params['count'])
+                        for i in tqdm(range(1, n_chunks), disable=not verbose,
+                                    initial=1, total=n_chunks):
+                            if cursor_exists:
+                                cursor = res['search-results']['cursor']['@next']
+                                params.update({'cursor': cursor})
+                            else:
+                                start += params["count"]
+                                params.update({'start': start})
+                            resp = get_content(url, api, params, **kwds)
+                            res = resp.json()
+                            data.extend(res.get('search-results', {}).get('entry', []))
+                        header = resp.headers  # Use header of final call
+                        self._json = data
+                    else:
+                        data = None
+                elif obj_retrieval:
+                    self._object = resp.content
+                    data = []
+                else:
+                    data = loads(resp.text)
+                    self._json = data
+                    data = [data]
             # Set private variables
             self._mdate = time()
             self._header = header
diff --git a/pybliometrics/superclasses/search.py b/pybliometrics/superclasses/search.py
index 9723048b..be5966c6 100644
--- a/pybliometrics/superclasses/search.py
+++ b/pybliometrics/superclasses/search.py
@@ -5,7 +5,7 @@
 from typing import Union
 
 from pybliometrics.superclasses import Base
-from pybliometrics.utils import get_config, COUNTS, URLS
+from pybliometrics.utils import flatten_dict, get_config, COUNTS, URLS
 
 
 class Search(Base):
@@ -37,13 +37,27 @@ def __init__(self,
         api = self.__class__.__name__
         # Construct query parameters
         count = COUNTS[api][self._view]
-        params = {'count': count, 'view': self._view, **kwds}
-        if isinstance(query, dict):
-            params.update(query)
-            name = "&".join(["=".join(t) for t in zip(query.keys(), query.values())])
+
+        if api == 'ScienceDirectSearch':
+            # Add default parameters
+            params = {**query}
+            params.setdefault('display', {})
+            defaults = {'offset': 0, 'show': count, 'sortBy': 'date'}
+            for key, default in defaults.items():
+                params['display'].setdefault(key, default)
+            # Flatten query and create name
+            flat_query = flatten_dict(query)
+            name = "&".join(["=".join(map(str, t)) for t in zip(flat_query.keys(), flat_query.values())])
         else:
-            params['query'] = query
-            name = query
+            params = {'count': count, 'view': self._view, **kwds}
+
+            if isinstance(query, dict):
+                params.update(query)
+                name = "&".join(["=".join(t) for t in zip(query.keys(), query.values())])
+            else:
+                params['query'] = query
+                name = query
+
         if cursor:
             params.update({'cursor': '*'})
         else:
diff --git a/pybliometrics/utils/constants.py b/pybliometrics/utils/constants.py
index fb0fb85c..b0caeb49 100644
--- a/pybliometrics/utils/constants.py
+++ b/pybliometrics/utils/constants.py
@@ -133,3 +133,4 @@
 
 # Other API restrictions
 SEARCH_MAX_ENTRIES = 5_000
+SCIENCE_DIRECT_MAX_ENTRIES = 6_000
diff --git a/pybliometrics/utils/get_content.py b/pybliometrics/utils/get_content.py
index 801e746c..9e0c1006 100644
--- a/pybliometrics/utils/get_content.py
+++ b/pybliometrics/utils/get_content.py
@@ -1,7 +1,9 @@
-from typing import Type
+from typing import Literal, Optional, Type
+from random import shuffle
 from requests import Session
 from requests.adapters import HTTPAdapter
 from requests.exceptions import JSONDecodeError
+from time import sleep, time
 from urllib3.util import Retry
 
 from pybliometrics import __version__
@@ -31,62 +33,12 @@ def get_session() -> Type[Session]:
     return session
 
 
-def get_content(url, api, params=None, **kwds):
-    """Helper function to download a file and return its content.
-
-    Parameters
-    ----------
-    url : str
-        The URL to be parsed.
-
-    api : str
-        The Scopus API to be accessed.
-
-    params : dict (optional)
-        Dictionary containing query parameters.  For required keys
-        and accepted values see e.g.
-        https://api.elsevier.com/documentation/AuthorRetrievalAPI.wadl
-
-    **kwds : key-value parings, optional
-        Keywords passed on to as query parameters.  Must contain fields
-        and values specified in the respective API specification.
-
-    Raises
-    ------
-    ScopusHtmlError or HTTPError
-        If the status of the response is not ok.
-
-    ValueError
-        If the accept parameter is not one of the accepted values.
-
-    Returns
-    -------
-    resp : byte-like object
-        The content of the file, which needs to be serialized.
-    """
-    from random import shuffle
-    from time import sleep, time
-
-    # Get needed ressources for query
-    config = get_config()
-
+def prepare_headers_and_tokens(params):
+    """Prepare headers and tokens for the request."""
     keys = get_keys()
-
-    # Get tokens and zip with keys
-    insttokens = get_insttokens()
-    insttokens = list(zip(keys, insttokens))
-
-    # Keep keys that are not insttokens
+    insttokens = list(zip(keys, get_insttokens()))
     keys = keys[len(insttokens):]
 
-    session = get_session()
-
-    params = params or {}
-    params.update(**kwds)
-    proxies = dict(config._sections.get("Proxy", {}))
-    timeout = config.getint("Requests", "Timeout", fallback=20)
-
-    # Get keys/tokens and create header
     token_key, insttoken = None, None
     if "insttoken" in params:
         token_key = params.pop("apikey")
@@ -98,64 +50,109 @@ def get_content(url, api, params=None, **kwds):
     else:
         key = keys.pop(0)
 
-    header = {'Accept': 'application/json',
-              'User-Agent': user_agent,
-              'X-ELS-APIKey': token_key or key}
+    header = {
+        'Accept': 'application/json',
+        'User-Agent': user_agent,
+        'X-ELS-APIKey': token_key or key
+    }
+
+    if insttoken:
+        header['X-ELS-Insttoken'] = insttoken
+
+    return header, insttokens, keys
+
 
-    # Eventually wait bc of throttling
+def handle_throttling(api):
+    """Handle throttling based on API limits."""
     if len(_throttling_params[api]) == _throttling_params[api].maxlen:
         try:
             sleep(1 - (time() - _throttling_params[api][0]))
         except (IndexError, ValueError):
             pass
 
+
+def handle_response(resp):
+    """Handle the response and raise appropriate errors."""
+    try:
+        error_type = errors[resp.status_code]
+        try:
+            reason = resp.json()['service-error']['status']['statusText']
+        except KeyError:
+            try:
+                reason = resp.json()['message']
+            except KeyError:
+                try:
+                    reason = resp.json()['error-response']['error-message']
+                except KeyError:
+                    reason = ""
+        raise error_type(reason)
+    except (JSONDecodeError, KeyError):
+        resp.raise_for_status()
+
+
+def get_content(url: str,
+                api: str,
+                params: Optional[dict],
+                method: Literal['GET', 'PUT'] = 'GET',
+                **kwds):
+    """Helper function to download a file and return its content."""
+    config = get_config()
+
+    session = get_session()
+
+    params = params or {}
+    params.update(**kwds)
+    proxies = dict(config._sections.get("Proxy", {}))
+    timeout = config.getint("Requests", "Timeout", fallback=20)
+
+    header, insttokens, keys = prepare_headers_and_tokens(params)
+    handle_throttling(api)
+
     # Use insttoken if available
-    if insttoken:
-        header['X-ELS-Insttoken'] = insttoken
-        resp = session.get(url, headers=header, params=params, timeout=timeout)
+    if 'X-ELS-Insttoken' in header:
+        if method == 'GET':
+            resp = session.get(url, headers=header, params=params, timeout=timeout)
+        else:
+            resp = session.put(url, headers=header, json=params, timeout=timeout)
     else:
-        resp = session.get(url, headers=header, params=params, timeout=timeout, proxies=proxies)
+        if method == 'GET':
+            resp = session.get(url, headers=header, params=params, timeout=timeout, proxies=proxies)
+        else:
+            resp = session.put(url, headers=header, json=params, timeout=timeout, proxies=proxies)
+
 
-    # If 429 try other tokens
-    while (resp.status_code == 429) or (resp.status_code == 401):
+    # Retry logic for 429 or 401
+    while resp.status_code in (429, 401):
         try:
             token_key, token = insttokens.pop(0) # Get and remove current key
             header['X-ELS-APIKey'] = token_key
             header['X-ELS-Insttoken'] = token
             shuffle(insttokens)
-            resp = session.get(url, headers=header, params=params, timeout=timeout)
+            if method == 'GET':
+                resp = session.get(url, headers=header, params=params, timeout=timeout)
+            else:
+                resp = session.put(url, headers=header, json=params, timeout=timeout)
         except IndexError:  # All tokens depleted
             break
 
-   # Remove Insttoken from header (if present)
-    if 'X-ELS-Insttoken' in header:
-        del header['X-ELS-Insttoken']
-
-    # If 429 try other keys
-    while (resp.status_code == 429) or (resp.status_code == 401):
+    while resp.status_code in (429, 401):
         try:
             key = keys.pop(0)  # Remove current key
             header['X-ELS-APIKey'] = key
             shuffle(keys)
-            resp = session.get(url, headers=header, proxies=proxies, params=params, timeout=timeout)
+            if method == 'GET':
+                resp = session.get(url, headers=header, proxies=proxies, params=params, timeout=timeout)
+            else:
+                resp = session.put(url, headers=header, json=params, timeout=timeout, proxies=proxies)
         except IndexError:  # All keys depleted
             break
 
+    if 'X-ELS-Insttoken' in header:
+        del header['X-ELS-Insttoken']
+
     _throttling_params[api].append(time())
 
-    # Eventually raise error, if possible with supplied error message
-    try:
-        error_type = errors[resp.status_code]
-        try:
-            reason = resp.json()['service-error']['status']['statusText']
-        except KeyError:
-            try:
-                reason = resp.json()['message']
-            except:
-                reason = ""
-        raise error_type(reason)
-    except (JSONDecodeError, KeyError):
-        resp.raise_for_status()
+    handle_response(resp)
     return resp
 
 
diff --git a/pybliometrics/utils/parse_content.py b/pybliometrics/utils/parse_content.py
index 277a0e14..e41e05b7 100644
--- a/pybliometrics/utils/parse_content.py
+++ b/pybliometrics/utils/parse_content.py
@@ -65,6 +65,18 @@ def deduplicate(lst):
     return new
 
 
+def flatten_dict(d, parent_key='', sep='.'):
+    """Recursively flatten a nested dictionary."""
+    items = []
+    for k, v in d.items():
+        new_key = f"{parent_key}{sep}{k}" if parent_key else k
+        if isinstance(v, dict):
+            items.extend(flatten_dict(v, new_key, sep=sep).items())
+        else:
+            items.append((new_key, v))
+    return dict(items)
+
+
 def get_id(s, integer=True):
     """Helper function to return the Scopus ID at a fixed position."""
     path = ['coredata', 'dc:identifier']
@@ -126,7 +138,7 @@ def make_int_if_possible(val):
     """Attempt a conversion to int type."""
     try:
         return int(val)
-    except TypeError:
+    except (TypeError, ValueError):
         return val
 
 

From 22882f12679b756c7e2d02cb641e5677ad06a641 Mon Sep 17 00:00:00 2001
From: Nils Herrmann <nils18@live.com.mx>
Date: Wed, 7 May 2025 18:42:30 +0200
Subject: [PATCH 2/9] Raise error if results exceed limit

---
 .../sciencedirect/ScienceDirectSearch.rst       |  8 ++++----
 .../sciencedirect/sciencedirect_search.py       | 14 ++++++++------
 .../tests/test_ScienceDirectSearch.py           | 17 +++++++++++------
 pybliometrics/superclasses/base.py              |  7 ++++++-
 4 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/docs/reference/sciencedirect/ScienceDirectSearch.rst b/docs/reference/sciencedirect/ScienceDirectSearch.rst
index bc2d77ce..688828cb 100644
--- a/docs/reference/sciencedirect/ScienceDirectSearch.rst
+++ b/docs/reference/sciencedirect/ScienceDirectSearch.rst
@@ -1,8 +1,8 @@
 pybliometrics.sciencedirect.ScienceDirectSearch
 ===============================================
 
-`ScienceDirectSearch()` implements the `ScienceDirect Search API <https://nonprod-devportal.elsevier.com/documentation/ScienceDirectSearchAPI.wadl>`_ using the `PUT method`.  It executes a query to search for documents and retrieves the resulting records.
-The class takes a `query`` dictionary as input which has to follow this schema:
+`ScienceDirectSearch()` implements the `ScienceDirect Search API <https://nonprod-devportal.elsevier.com/documentation/ScienceDirectSearchAPI.wadl>`_ using the `PUT` method.  It executes a query to search for documents and retrieves the resulting records.
+The class takes a `query` dictionary as input which has to follow this schema:
 
 .. code-block:: text
 
@@ -44,7 +44,7 @@ Examples
 --------
 
 The class is initialized with a search query.
-We can pass the field `qs` to search for a specific keywords.
+We can pass the field `qs`` to search for specific keywords.
 Using `verbose=True` will print the progress of the download.
 
 .. code-block:: python
@@ -78,7 +78,7 @@ To access the results, use the attribute `results` which contains a list of `Doc
      Document(authors='Ruxandra Stoean; Nebojsa Bacanin; Leonard Ionescu', doi='10.1016/j.culher.2024.07.008', loadDate='2024-08-09T00:00:00.000Z', openAccess=False, first_page=18, last_page=26, pii='S1296207424001468', publicationDate='2024-10-31', sourceTitle='Journal of Cultural Heritage', title='Bridging the past and present: AI-driven 3D restoration of degraded artefacts for museum digital display', uri='https://www.sciencedirect.com/science/article/pii/S1296207424001468?dgcid=api_sd_search-api-endpoint', volumeIssue='Volume 69'),
      ...]
 
-The list of results can be cast into a Pandas DataFrame.
+The list of results can be converted into a Pandas DataFrame.
 
 .. code-block:: python
 
diff --git a/pybliometrics/sciencedirect/sciencedirect_search.py b/pybliometrics/sciencedirect/sciencedirect_search.py
index bbeb18cd..c80b07f2 100644
--- a/pybliometrics/sciencedirect/sciencedirect_search.py
+++ b/pybliometrics/sciencedirect/sciencedirect_search.py
@@ -1,3 +1,4 @@
+"""ScienceDirectSearch class for searching documents in ScienceDirect."""
 from collections import namedtuple
 from typing import Optional, Union
 
@@ -8,11 +9,16 @@
 
 
 class ScienceDirectSearch(Search):
+    """
+    Interaction with the ScienceDirect Search API using the `PUT` method.
+    See the official `documentation <https://dev.elsevier.com/tecdoc_sdsearch_migration.html>`__ 
+    for more details.
+    """
     @property
     def results(self) -> Optional[list]:
         """
-        A list of namedtuples in the form `(authors doi loadDate openAccess first_page last_page
-        pii publicationDate sourceTitle title uri volumeIssue)`.
+        A list of namedtuples in the form `(authors, doi, loadDate, openAccess, first_page, last_page
+        pii, publicationDate, sourceTitle, title, uri, volumeIssue)`.
 
         Field definitions correspond to the `ScienceDirect Search API Migration Documentation
         <https://dev.elsevier.com/tecdoc_sdsearch_migration.html>`__ and return the
@@ -69,10 +75,6 @@ def __init__(self,
                  subscriber: bool = True,
                  ) -> None:
         """
-        Interaction with the ScienceDirect Search API using the `PUT` method.
-        See the official `documentation <https://dev.elsevier.com/tecdoc_sdsearch_migration.html>`__ 
-        for more details.
-
         Parameters
         ----------
         query : dict
diff --git a/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py b/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py
index acfd823d..79fc5e87 100644
--- a/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py
+++ b/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py
@@ -3,6 +3,7 @@
 
 import pytest
 
+from pybliometrics.exception import ScopusQueryError
 from pybliometrics.sciencedirect import ScienceDirectSearch, init
 
 init()
@@ -20,12 +21,6 @@
 pagination_query = {'qs': '"Neural Networks" AND "Shapley"', 'date': '2020'}
 sds_pagination = ScienceDirectSearch(pagination_query, view="STANDARD", refresh=30)
 
-
-def test_empty_results():
-    assert sds_empty.results is None
-    assert sds_empty._n == 0
-
-
 def test_all_fields():
     fields = 'authors doi loadDate openAccess first_page last_page pii publicationDate ' \
              'sourceTitle title uri volumeIssue'
@@ -65,6 +60,10 @@ def test_all_fields():
     assert sds_pagination.results[-1] == expected_last_document
 
 
+def test_empty_results():
+    assert sds_empty.results is None
+    assert sds_empty._n == 0
+
 
 def test_field_consistency():
     am_wrong_field = ScienceDirectSearch(one_article_query,
@@ -76,12 +75,18 @@ def test_field_consistency():
         _ = am_wrong_field.results
 
 
+def test_large_results():
+    with pytest.raises(ScopusQueryError):
+        _ = ScienceDirectSearch(huge_query, view="STANDARD", download=True, refresh=30)
+
+
 def test_length():
     assert len(sds_standard.results) == sds_standard._n
     assert len(sds_standard.results) == sds_standard._n
     assert sds_huge.get_results_size() > 156_000
     assert len(sds_pagination.results) == 127
 
+
 def test_string():
     expected_str = "Search '{'title': 'Assessing LLMs in malicious code deobfuscation of real-world malware campaigns', 'date': '2024'}' yielded 1 document as of 2025-05-07:\n    10.1016/j.eswa.2024.124912"
     assert str(sds_standard) == expected_str
diff --git a/pybliometrics/superclasses/base.py b/pybliometrics/superclasses/base.py
index eb543d09..d5cb4285 100644
--- a/pybliometrics/superclasses/base.py
+++ b/pybliometrics/superclasses/base.py
@@ -8,7 +8,7 @@
 from tqdm import tqdm
 
 from pybliometrics.exception import ScopusQueryError
-from pybliometrics.utils import get_content, parse_content, SEARCH_MAX_ENTRIES
+from pybliometrics.utils import get_content, parse_content, SCIENCE_DIRECT_MAX_ENTRIES, SEARCH_MAX_ENTRIES
 from pybliometrics.utils import listify
 
 
@@ -77,6 +77,11 @@ def __init__(self,
                 self._n = n
                 self._json = []
                 if download:
+                    if n > SCIENCE_DIRECT_MAX_ENTRIES:
+                        text = f'Found {n:,} matches.  The query fails to return '\
+                            f'more than {SCIENCE_DIRECT_MAX_ENTRIES} entries. Please '\
+                            'refine your query.'
+                        raise ScopusQueryError(text)
                     data = res.get('results', [])
                     n_chunks = ceil(n/params["display"]["show"])
                     if verbose:

From 0803af92b19fdaf8ef38b6cefaf3f724c7841373 Mon Sep 17 00:00:00 2001
From: Nils Herrmann <nils18@live.com.mx>
Date: Sun, 18 May 2025 19:49:56 +0200
Subject: [PATCH 3/9] Requested changes

---
 .../sciencedirect/ScienceDirectSearch.rst     |  9 ++---
 .../sciencedirect/sciencedirect_search.py     | 23 ++++++++---
 .../tests/test_ScienceDirectSearch.py         | 38 ++++++++++---------
 pybliometrics/superclasses/search.py          | 11 +++---
 4 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/docs/reference/sciencedirect/ScienceDirectSearch.rst b/docs/reference/sciencedirect/ScienceDirectSearch.rst
index 688828cb..6d001680 100644
--- a/docs/reference/sciencedirect/ScienceDirectSearch.rst
+++ b/docs/reference/sciencedirect/ScienceDirectSearch.rst
@@ -2,7 +2,7 @@ pybliometrics.sciencedirect.ScienceDirectSearch
 ===============================================
 
 `ScienceDirectSearch()` implements the `ScienceDirect Search API <https://nonprod-devportal.elsevier.com/documentation/ScienceDirectSearchAPI.wadl>`_ using the `PUT` method.  It executes a query to search for documents and retrieves the resulting records.
-The class takes a `query` dictionary as input which has to follow this schema:
+The class takes a `query` string that searches through all the article's or chapter's content. You can also pass any of the following parameters as keyword arguments:
 
 .. code-block:: text
 
@@ -44,16 +44,15 @@ Examples
 --------
 
 The class is initialized with a search query.
-We can pass the field `qs`` to search for specific keywords.
+We can pass `date` as keyword argument to search for documents published in a specific date.
 Using `verbose=True` will print the progress of the download.
 
 .. code-block:: python
 
     >>> from pybliometrics.sciencedirect import ScienceDirectSearch, init
     >>> init()
-    >>> # Retrieve documents based on the search query  
-    >>> query = query = {'qs': '"neural radiance fields" AND "3D rendering"', 'date': '2024'}
-    >>> sds = ScienceDirectSearch(query, verbose=True)
+    >>> # Retrieve documents based on the search query and date
+    >>> sds = ScienceDirectSearch('"neural radiance fields" AND "3D rendering"', date='2024', verbose=True)
     Downloading results for query "{'qs': '"neural radiance fields" AND "3D rendering"', 'date': '2024', 'display': {'offset': 0, 'show': 100, 'sortBy': 'date'}, 'cursor': '*'}":
     100%|██████████| 1/1 [00:00<00:00,  3.23it/s]
 
diff --git a/pybliometrics/sciencedirect/sciencedirect_search.py b/pybliometrics/sciencedirect/sciencedirect_search.py
index c80b07f2..7a02f739 100644
--- a/pybliometrics/sciencedirect/sciencedirect_search.py
+++ b/pybliometrics/sciencedirect/sciencedirect_search.py
@@ -65,7 +65,7 @@ def results(self) -> Optional[list]:
         return out or None
 
     def __init__(self,
-                 query: dict,
+                 query: str = '',
                  refresh: Union[bool, int] = False,
                  view: Optional[str] = None,
                  verbose: bool = False,
@@ -73,13 +73,13 @@ def __init__(self,
                  integrity_fields: Optional[Union[list[str], tuple[str, ...]]] = None,
                  integrity_action: str = "raise",
                  subscriber: bool = True,
+                 **kwds: str
                  ) -> None:
         """
         Parameters
         ----------
-        query : dict
-            The query to be sent to the API, e.g.,
-            {'qs': '"Neural Networks" AND "Shapley"', 'date': '2019-2020'}
+        query : str
+            The query to be sent to the API, e.g. '"Neural Networks" AND "Shapley"'
 
         refresh : bool or int, optional
             Whether to refresh the cached file. If an int is passed, the cache
@@ -107,6 +107,12 @@ def __init__(self,
         subscriber : bool, optional
             If True, cursor navigation is enabled, allowing more than 5,000 results.
         
+        **kwds: str
+            Additional keyword arguments to be passed to the API. These can be any available
+            search fields, such as `authors`, `pub-date` and `title`. For a full list of
+            available fields, see the `ScienceDirect Search API Migration Documentation
+            <https://dev.elsevier.com/tecdoc_sdsearch_migration.html>`__.
+        
         Raises
         ------
         ScopusQueryError
@@ -117,8 +123,13 @@ def __init__(self,
             is not one of the allowed values.
 
         """
+        # Check if the query and keyword arguments are empty
+        if not (query or kwds):
+            msg = "The query is empty. Please provide either a query string or keyword arguments."
+            raise ValueError(msg)
+
         if view:
-            check_parameter_value(view, VIEWS['ScienceDirectSearch'], "view")
+            check_parameter_value(view, VIEWS["ScienceDirectSearch"], "view")
         else:
             view = "STANDARD"
 
@@ -133,7 +144,7 @@ def __init__(self,
 
         Search.__init__(self, query=query,
                         cursor=subscriber, download=download,
-                        verbose=verbose)
+                        verbose=verbose, **kwds)
 
     def __str__(self):
         """Print a summary string."""
diff --git a/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py b/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py
index 79fc5e87..492fd6fb 100644
--- a/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py
+++ b/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py
@@ -8,18 +8,21 @@
 
 init()
 
-one_article_query = {'title': 'Assessing LLMs in malicious code deobfuscation of real-world malware campaigns',
-                     'date': '2024'}
-sds_standard = ScienceDirectSearch(one_article_query, refresh=30)
+sds_standard = ScienceDirectSearch(title='Assessing LLMs in malicious code deobfuscation of real-world malware campaigns',
+                                   date='2024',
+                                   refresh=30)
 
-empty_query = {'title': 'Not a realistic title', 'date': '2012'}
-sds_empty = ScienceDirectSearch(empty_query, view="STANDARD", refresh=30)
+sds_empty = ScienceDirectSearch(title='Not a realistic title',
+                                date='2012',
+                                view="STANDARD", refresh=30)
 
-huge_query = {'qs': 'Neural Networks', 'date': '2015-2020'}
-sds_huge = ScienceDirectSearch(huge_query, view="STANDARD", download=False, refresh=30)
+sds_huge = ScienceDirectSearch('Neural Networks',
+                               date='2015-2020',
+                               view="STANDARD", download=False, refresh=30)
 
-pagination_query = {'qs': '"Neural Networks" AND "Shapley"', 'date': '2020'}
-sds_pagination = ScienceDirectSearch(pagination_query, view="STANDARD", refresh=30)
+sds_pagination = ScienceDirectSearch('"Neural Networks" AND "Shapley"',
+                                     date='2020',
+                                     view="STANDARD", refresh=30)
 
 def test_all_fields():
     fields = 'authors doi loadDate openAccess first_page last_page pii publicationDate ' \
@@ -66,18 +69,19 @@ def test_empty_results():
 
 
 def test_field_consistency():
-    am_wrong_field = ScienceDirectSearch(one_article_query,
-                                 integrity_fields=["notExistingField"],
-                                 integrity_action="warn",
-                                 view="STANDARD",
-                                 refresh=30)
+    am_wrong_field = ScienceDirectSearch(query='',
+                                   title='Assessing LLMs in malicious code deobfuscation of real-world malware campaigns',
+                                   date='2024',
+                                   integrity_fields=["notExistingField"],
+                                   integrity_action="warn",
+                                   view="STANDARD", refresh=30)
     with pytest.raises(ValueError):
         _ = am_wrong_field.results
 
 
 def test_large_results():
     with pytest.raises(ScopusQueryError):
-        _ = ScienceDirectSearch(huge_query, view="STANDARD", download=True, refresh=30)
+        _ = ScienceDirectSearch('Neural Networks', view="STANDARD", download=True, refresh=30)
 
 
 def test_length():
@@ -88,5 +92,5 @@ def test_length():
 
 
 def test_string():
-    expected_str = "Search '{'title': 'Assessing LLMs in malicious code deobfuscation of real-world malware campaigns', 'date': '2024'}' yielded 1 document as of 2025-05-07:\n    10.1016/j.eswa.2024.124912"
-    assert str(sds_standard) == expected_str
+    expected_str = "Search '' yielded 1 document as of"
+    assert str(sds_standard).startswith(expected_str)
diff --git a/pybliometrics/superclasses/search.py b/pybliometrics/superclasses/search.py
index be5966c6..8234225a 100644
--- a/pybliometrics/superclasses/search.py
+++ b/pybliometrics/superclasses/search.py
@@ -39,15 +39,16 @@ def __init__(self,
         count = COUNTS[api][self._view]
 
         if api == 'ScienceDirectSearch':
-            # Add default parameters
-            params = {**query}
+            # Set qs, keyword arguments and add default parameters
+            params = {'qs': query, **kwds}
+            # Flatten query and create name
+            flat_query = flatten_dict(params)
+            name = "&".join(["=".join(map(str, t)) for t in zip(flat_query.keys(), flat_query.values())])
+            # Add default parameters for pagination
             params.setdefault('display', {})
             defaults = {'offset': 0, 'show': count, 'sortBy': 'date'}
             for key, default in defaults.items():
                 params['display'].setdefault(key, default)
-            # Flatten query and create name
-            flat_query = flatten_dict(query)
-            name = "&".join(["=".join(map(str, t)) for t in zip(flat_query.keys(), flat_query.values())])
         else:
             params = {'count': count, 'view': self._view, **kwds}
 

From 98b050459378f7091b045c3662b3aa169a540b08 Mon Sep 17 00:00:00 2001
From: Nils Herrmann <nils18@live.com.mx>
Date: Sun, 18 May 2025 20:00:47 +0200
Subject: [PATCH 4/9] Let query be optional and set '' as default

---
 pybliometrics/sciencedirect/sciencedirect_search.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pybliometrics/sciencedirect/sciencedirect_search.py b/pybliometrics/sciencedirect/sciencedirect_search.py
index 7a02f739..fa36bb25 100644
--- a/pybliometrics/sciencedirect/sciencedirect_search.py
+++ b/pybliometrics/sciencedirect/sciencedirect_search.py
@@ -65,7 +65,7 @@ def results(self) -> Optional[list]:
         return out or None
 
     def __init__(self,
-                 query: str = '',
+                 query: Optional[str] = None,
                  refresh: Union[bool, int] = False,
                  view: Optional[str] = None,
                  verbose: bool = False,
@@ -127,6 +127,7 @@ def __init__(self,
         if not (query or kwds):
             msg = "The query is empty. Please provide either a query string or keyword arguments."
             raise ValueError(msg)
+        query = query or ''
 
         if view:
             check_parameter_value(view, VIEWS["ScienceDirectSearch"], "view")

From a5e69f8260b060bbe86faae685420afcddf426aa Mon Sep 17 00:00:00 2001
From: Nils Herrmann <nils18@live.com.mx>
Date: Sun, 18 May 2025 20:17:45 +0200
Subject: [PATCH 5/9] ScienceDirectSearch: Test empty query

---
 .../sciencedirect/tests/test_ScienceDirectSearch.py          | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py b/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py
index 492fd6fb..3edb007f 100644
--- a/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py
+++ b/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py
@@ -68,6 +68,11 @@ def test_empty_results():
     assert sds_empty._n == 0
 
 
+def test_empty_query():
+    with pytest.raises(ValueError):
+        _ = ScienceDirectSearch(view="STANDARD", refresh=30)
+
+
 def test_field_consistency():
     am_wrong_field = ScienceDirectSearch(query='',
                                    title='Assessing LLMs in malicious code deobfuscation of real-world malware campaigns',

From 4e069054f2d85652d5e6242dcda20055cc819968 Mon Sep 17 00:00:00 2001
From: Nils Herrmann <nils18@live.com.mx>
Date: Fri, 13 Jun 2025 16:00:16 +0200
Subject: [PATCH 6/9] Make minimal changes

---
 pybliometrics/utils/get_content.py | 152 +++++++++++++++++------------
 1 file changed, 87 insertions(+), 65 deletions(-)

diff --git a/pybliometrics/utils/get_content.py b/pybliometrics/utils/get_content.py
index 9e0c1006..50040f35 100644
--- a/pybliometrics/utils/get_content.py
+++ b/pybliometrics/utils/get_content.py
@@ -1,9 +1,7 @@
 from typing import Literal, Optional, Type
-from random import shuffle
 from requests import Session
 from requests.adapters import HTTPAdapter
 from requests.exceptions import JSONDecodeError
-from time import sleep, time
 from urllib3.util import Retry
 
 from pybliometrics import __version__
@@ -33,12 +31,66 @@ def get_session() -> Type[Session]:
     return session
 
 
-def prepare_headers_and_tokens(params):
-    """Prepare headers and tokens for the request."""
+def get_content(url: str,
+                api: str,
+                params: Optional[dict],
+                method: Literal['GET', 'PUT'] = 'GET',
+                **kwds):
+    """Helper function to download a file and return its content.
+
+    Parameters
+    ----------
+    url : str
+        The URL to be parsed.
+
+    api : str
+        The Scopus API to be accessed.
+
+    params : dict (optional)
+        Dictionary containing query parameters.  For required keys
+        and accepted values see e.g.
+        https://api.elsevier.com/documentation/AuthorRetrievalAPI.wadl
+
+    **kwds : key-value parings, optional
+        Keywords passed on to as query parameters.  Must contain fields
+        and values specified in the respective API specification.
+
+    Raises
+    ------
+    ScopusHtmlError or HTTPError
+        If the status of the response is not ok.
+
+    ValueError
+        If the accept parameter is not one of the accepted values.
+
+    Returns
+    -------
+    resp : byte-like object
+        The content of the file, which needs to be serialized.
+    """
+    from random import shuffle
+    from time import sleep, time
+
+    # Get needed ressources for query
+    config = get_config()
+
     keys = get_keys()
-    insttokens = list(zip(keys, get_insttokens()))
+
+    # Get tokens and zip with keys
+    insttokens = get_insttokens()
+    insttokens = list(zip(keys, insttokens))
+
+    # Keep keys that are not insttokens
     keys = keys[len(insttokens):]
 
+    session = get_session()
+
+    params = params or {}
+    params.update(**kwds)
+    proxies = dict(config._sections.get("Proxy", {}))
+    timeout = config.getint("Requests", "Timeout", fallback=20)
+
+    # Get keys/tokens and create header
     token_key, insttoken = None, None
     if "insttoken" in params:
         token_key = params.pop("apikey")
@@ -50,66 +102,20 @@ def prepare_headers_and_tokens(params):
     else:
         key = keys.pop(0)
 
-    header = {
-        'Accept': 'application/json',
-        'User-Agent': user_agent,
-        'X-ELS-APIKey': token_key or key
-    }
+    header = {'Accept': 'application/json',
+              'User-Agent': user_agent,
+              'X-ELS-APIKey': token_key or key}
 
-    if insttoken:
-        header['X-ELS-Insttoken'] = insttoken
-
-    return header, insttokens, keys
-
-
-def handle_throttling(api):
-    """Handle throttling based on API limits."""
+    # Eventually wait bc of throttling
     if len(_throttling_params[api]) == _throttling_params[api].maxlen:
         try:
             sleep(1 - (time() - _throttling_params[api][0]))
         except (IndexError, ValueError):
             pass
 
-
-def handle_response(resp):
-    """Handle the response and raise appropriate errors."""
-    try:
-        error_type = errors[resp.status_code]
-        try:
-            reason = resp.json()['service-error']['status']['statusText']
-        except KeyError:
-            try:
-                reason = resp.json()['message']
-            except KeyError:
-                try:
-                    reason = resp.json()['error-response']['error-message']
-                except KeyError:
-                    reason = ""
-        raise error_type(reason)
-    except (JSONDecodeError, KeyError):
-        resp.raise_for_status()
-
-
-def get_content(url: str,
-                api: str,
-                params: Optional[dict],
-                method: Literal['GET', 'PUT'] = 'GET',
-                **kwds):
-    """Helper function to download a file and return its content."""
-    config = get_config()
-
-    session = get_session()
-
-    params = params or {}
-    params.update(**kwds)
-    proxies = dict(config._sections.get("Proxy", {}))
-    timeout = config.getint("Requests", "Timeout", fallback=20)
-
-    header, insttokens, keys = prepare_headers_and_tokens(params)
-    handle_throttling(api)
-
     # Use insttoken if available
-    if 'X-ELS-Insttoken' in header:
+    if insttoken:
+        header['X-ELS-Insttoken'] = insttoken
         if method == 'GET':
             resp = session.get(url, headers=header, params=params, timeout=timeout)
         else:
@@ -120,9 +126,8 @@ def get_content(url: str,
         else:
             resp = session.put(url, headers=header, json=params, timeout=timeout, proxies=proxies)
 
-
-    # Retry logic for 429 or 401
-    while resp.status_code in (429, 401):
+    # If 429 try other tokens
+    while (resp.status_code == 429) or (resp.status_code == 401):
         try:
             token_key, token = insttokens.pop(0) # Get and remove current key
             header['X-ELS-APIKey'] = token_key
@@ -135,7 +140,12 @@ def get_content(url: str,
         except IndexError:  # All tokens depleted
             break
 
-    while resp.status_code in (429, 401):
+   # Remove Insttoken from header (if present)
+    if 'X-ELS-Insttoken' in header:
+        del header['X-ELS-Insttoken']
+
+    # If 429 try other keys
+    while (resp.status_code == 429) or (resp.status_code == 401):
         try:
             key = keys.pop(0)  # Remove current key
             header['X-ELS-APIKey'] = key
@@ -147,12 +157,24 @@ def get_content(url: str,
         except IndexError:  # All keys depleted
             break
 
-    if 'X-ELS-Insttoken' in header:
-        del header['X-ELS-Insttoken']
-
     _throttling_params[api].append(time())
 
-    handle_response(resp)
+    # Eventually raise error, if possible with supplied error message
+    try:
+        error_type = errors[resp.status_code]
+        try:
+            reason = resp.json()['service-error']['status']['statusText']
+        except KeyError:
+            try:
+                reason = resp.json()['message']
+            except KeyError:
+                try:
+                    reason = resp.json()['error-response']['error-message']
+                except KeyError:
+                    reason = ""
+        raise error_type(reason)
+    except (JSONDecodeError, KeyError):
+        resp.raise_for_status()
     return resp
 
 

From 4c5e2133709d4464c2ef5522a8f2309aad22c3e0 Mon Sep 17 00:00:00 2001
From: Nils Herrmann <nils18@live.com.mx>
Date: Fri, 13 Jun 2025 16:16:42 +0200
Subject: [PATCH 7/9] Requested changes: ScienceDirectSearch

---
 .../sciencedirect/sciencedirect_search.py     | 79 +++++++++----------
 1 file changed, 38 insertions(+), 41 deletions(-)

diff --git a/pybliometrics/sciencedirect/sciencedirect_search.py b/pybliometrics/sciencedirect/sciencedirect_search.py
index fa36bb25..11b72b22 100644
--- a/pybliometrics/sciencedirect/sciencedirect_search.py
+++ b/pybliometrics/sciencedirect/sciencedirect_search.py
@@ -9,11 +9,6 @@
 
 
 class ScienceDirectSearch(Search):
-    """
-    Interaction with the ScienceDirect Search API using the `PUT` method.
-    See the official `documentation <https://dev.elsevier.com/tecdoc_sdsearch_migration.html>`__ 
-    for more details.
-    """
     @property
     def results(self) -> Optional[list]:
         """
@@ -76,42 +71,38 @@ def __init__(self,
                  **kwds: str
                  ) -> None:
         """
-        Parameters
-        ----------
-        query : str
-            The query to be sent to the API, e.g. '"Neural Networks" AND "Shapley"'
-
-        refresh : bool or int, optional
-            Whether to refresh the cached file. If an int is passed, the cache
-            will refresh if older than that many days.
-
-        view : str, optional
-            The API view to use. Default is "STANDARD".
-
-        verbose : bool, optional
-            Whether to print a download progress bar.
-
-        download : bool, optional
-            Whether to download results (if they haven't been cached).
-
-        integrity_fields : list of str or tuple of str, optional
-            Fields whose completeness should be checked. If any field is missing,
-            the `integrity_action` will be triggered.
-
-        integrity_action : {'raise', 'warn'}, optional
-            What to do if required fields are missing:
-            
-            - 'raise' : Raise an AttributeError
-            - 'warn' : Emit a UserWarning
-
-        subscriber : bool, optional
-            If True, cursor navigation is enabled, allowing more than 5,000 results.
-        
-        **kwds: str
-            Additional keyword arguments to be passed to the API. These can be any available
-            search fields, such as `authors`, `pub-date` and `title`. For a full list of
-            available fields, see the `ScienceDirect Search API Migration Documentation
-            <https://dev.elsevier.com/tecdoc_sdsearch_migration.html>`__.
+        Interaction with the ScienceDirect Search API using the `PUT` method.
+        See the official `documentation <https://dev.elsevier.com/tecdoc_sdsearch_migration.html>`__ 
+        for more details.
+
+        :param query: Free text query string as the `qs`field in the `documentation
+                      <https://dev.elsevier.com/tecdoc_sdsearch_migration.html>`__.
+        :param refresh: Whether to refresh the cached file if it exists or not.
+                        If int is passed, cached file will be refreshed if the
+                        number of days since last modification exceeds that value.
+        :param view: Which view to use for the query, see `the documentation <https://dev.elsevier.com/sd_search_views.html>`__.
+                     Allowed values: `STANDARD`.
+        :param verbose: Whether to print a download progress bar.
+        :param download: Whether to download results (if they have not been
+                         cached).
+        :param integrity_fields: A list or tuple with the names of fields whose completeness should
+                                 be checked.  `ScienceDirectSearch` will perform the
+                                 action specified in `integrity_action` if
+                                 elements in these fields are missing.  This
+                                 helps to avoid idiosynchratically missing
+                                 elements that should always be present
+                                 (e.g., doi or authors).
+        :param integrity_action: What to do in case integrity of provided fields
+                                 cannot be verified.  Possible actions:
+                                 - `"raise"`: Raise an `AttributeError`
+                                 - `"warn"`: Raise a `UserWarning`
+        :param subscriber: Whether you access ScienceDirect with a subscription or not.
+                           For subscribers, ScienceDirect's cursor navigation will be
+                           used.  Sets the number of entries in each query
+                           iteration to the maximum number allowed by the
+                           corresponding view.
+        :param kwds: Keywords passed on as query parameters.  Must contain
+                     fields and values mentioned in the `API specification <https://dev.elsevier.com/tecdoc_sdsearch_migration.html>`__.
         
         Raises
         ------
@@ -121,6 +112,12 @@ def __init__(self,
         ValueError
             If any of the parameters `integrity_action`, `refresh` or `view`
             is not one of the allowed values.
+        
+        Notes
+        -----
+        The directory for cached results is `{path}/{view}/{fname}`,
+        where `path` is specified in your configuration file and `fname` is
+        the md5-hashed version of the flattened `query`.
 
         """
         # Check if the query and keyword arguments are empty

From fc6f15ea486f32dd305f67def4c01b71e4db270c Mon Sep 17 00:00:00 2001
From: Nils Herrmann <nils18@live.com.mx>
Date: Fri, 13 Jun 2025 17:20:58 +0200
Subject: [PATCH 8/9] Base: Put Science Direct search conditional second

---
 .../tests/test_ScienceDirectSearch.py         |  8 +--
 pybliometrics/superclasses/base.py            | 56 +++++++++----------
 2 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py b/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py
index 3edb007f..7ec0673e 100644
--- a/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py
+++ b/pybliometrics/sciencedirect/tests/test_ScienceDirectSearch.py
@@ -10,19 +10,19 @@
 
 sds_standard = ScienceDirectSearch(title='Assessing LLMs in malicious code deobfuscation of real-world malware campaigns',
                                    date='2024',
-                                   refresh=30)
+                                   refresh=True)
 
 sds_empty = ScienceDirectSearch(title='Not a realistic title',
                                 date='2012',
-                                view="STANDARD", refresh=30)
+                                view="STANDARD", refresh=True)
 
 sds_huge = ScienceDirectSearch('Neural Networks',
                                date='2015-2020',
-                               view="STANDARD", download=False, refresh=30)
+                               view="STANDARD", download=False, refresh=True)
 
 sds_pagination = ScienceDirectSearch('"Neural Networks" AND "Shapley"',
                                      date='2020',
-                                     view="STANDARD", refresh=30)
+                                     view="STANDARD", refresh=True)
 
 def test_all_fields():
     fields = 'authors doi loadDate openAccess first_page last_page pii publicationDate ' \
diff --git a/pybliometrics/superclasses/base.py b/pybliometrics/superclasses/base.py
index d5cb4285..58046788 100644
--- a/pybliometrics/superclasses/base.py
+++ b/pybliometrics/superclasses/base.py
@@ -68,34 +68,7 @@ def __init__(self,
             else:
                 self._json = loads(fname.read_text())
         else:
-            if sciencedirect_search:
-                resp = get_content(url, api, params, 'PUT' ,**kwds)
-                header = resp.headers
-                res = resp.json()
-                # Get the number of results
-                n = int(res.get('resultsFound', 0))
-                self._n = n
-                self._json = []
-                if download:
-                    if n > SCIENCE_DIRECT_MAX_ENTRIES:
-                        text = f'Found {n:,} matches.  The query fails to return '\
-                            f'more than {SCIENCE_DIRECT_MAX_ENTRIES} entries. Please '\
-                            'refine your query.'
-                        raise ScopusQueryError(text)
-                    data = res.get('results', [])
-                    n_chunks = ceil(n/params["display"]["show"])
-                    if verbose:
-                        print(f'Downloading results for query "{params}":')
-                    for i in tqdm(range(1, n_chunks), disable=not verbose):
-                        params['display']['offset'] += params["display"]["show"]
-                        resp = get_content(url, api, params, 'PUT' ,**kwds)
-                        res = resp.json()
-                        data.extend(res.get('results', []))
-                    header = resp.headers  # Use header of final call
-                    self._json = data
-                else:
-                    data = None
-            else:
+            if not sciencedirect_search:
                 resp = get_content(url, api, params, **kwds)
                 header = resp.headers
 
@@ -151,6 +124,33 @@ def __init__(self,
                     data = loads(resp.text)
                     self._json = data
                     data = [data]
+            else: # ScienceDirect Search API
+                resp = get_content(url, api, params, 'PUT' ,**kwds)
+                header = resp.headers
+                res = resp.json()
+                # Get the number of results
+                n = int(res.get('resultsFound', 0))
+                self._n = n
+                self._json = []
+                if download:
+                    if n > SCIENCE_DIRECT_MAX_ENTRIES:
+                        text = f'Found {n:,} matches.  The query fails to return '\
+                            f'more than {SCIENCE_DIRECT_MAX_ENTRIES} entries. Please '\
+                            'refine your query.'
+                        raise ScopusQueryError(text)
+                    data = res.get('results', [])
+                    n_chunks = ceil(n/params["display"]["show"])
+                    if verbose:
+                        print(f'Downloading results for query "{params}":')
+                    for i in tqdm(range(1, n_chunks), disable=not verbose):
+                        params['display']['offset'] += params["display"]["show"]
+                        resp = get_content(url, api, params, 'PUT' ,**kwds)
+                        res = resp.json()
+                        data.extend(res.get('results', []))
+                    header = resp.headers  # Use header of final call
+                    self._json = data
+                else:
+                    data = None
             # Set private variables
             self._mdate = time()
             self._header = header

From 921fa26ca1039e8a6ad026511bf2f852c463bcbd Mon Sep 17 00:00:00 2001
From: Nils Herrmann <nils18@live.com.mx>
Date: Fri, 13 Jun 2025 17:26:06 +0200
Subject: [PATCH 9/9] Undo last commit

---
 pybliometrics/superclasses/base.py | 56 +++++++++++++++---------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/pybliometrics/superclasses/base.py b/pybliometrics/superclasses/base.py
index 58046788..d5cb4285 100644
--- a/pybliometrics/superclasses/base.py
+++ b/pybliometrics/superclasses/base.py
@@ -68,7 +68,34 @@ def __init__(self,
             else:
                 self._json = loads(fname.read_text())
         else:
-            if not sciencedirect_search:
+            if sciencedirect_search:
+                resp = get_content(url, api, params, 'PUT' ,**kwds)
+                header = resp.headers
+                res = resp.json()
+                # Get the number of results
+                n = int(res.get('resultsFound', 0))
+                self._n = n
+                self._json = []
+                if download:
+                    if n > SCIENCE_DIRECT_MAX_ENTRIES:
+                        text = f'Found {n:,} matches.  The query fails to return '\
+                            f'more than {SCIENCE_DIRECT_MAX_ENTRIES} entries. Please '\
+                            'refine your query.'
+                        raise ScopusQueryError(text)
+                    data = res.get('results', [])
+                    n_chunks = ceil(n/params["display"]["show"])
+                    if verbose:
+                        print(f'Downloading results for query "{params}":')
+                    for i in tqdm(range(1, n_chunks), disable=not verbose):
+                        params['display']['offset'] += params["display"]["show"]
+                        resp = get_content(url, api, params, 'PUT' ,**kwds)
+                        res = resp.json()
+                        data.extend(res.get('results', []))
+                    header = resp.headers  # Use header of final call
+                    self._json = data
+                else:
+                    data = None
+            else:
                 resp = get_content(url, api, params, **kwds)
                 header = resp.headers
 
@@ -124,33 +151,6 @@ def __init__(self,
                     data = loads(resp.text)
                     self._json = data
                     data = [data]
-            else: # ScienceDirect Search API
-                resp = get_content(url, api, params, 'PUT' ,**kwds)
-                header = resp.headers
-                res = resp.json()
-                # Get the number of results
-                n = int(res.get('resultsFound', 0))
-                self._n = n
-                self._json = []
-                if download:
-                    if n > SCIENCE_DIRECT_MAX_ENTRIES:
-                        text = f'Found {n:,} matches.  The query fails to return '\
-                            f'more than {SCIENCE_DIRECT_MAX_ENTRIES} entries. Please '\
-                            'refine your query.'
-                        raise ScopusQueryError(text)
-                    data = res.get('results', [])
-                    n_chunks = ceil(n/params["display"]["show"])
-                    if verbose:
-                        print(f'Downloading results for query "{params}":')
-                    for i in tqdm(range(1, n_chunks), disable=not verbose):
-                        params['display']['offset'] += params["display"]["show"]
-                        resp = get_content(url, api, params, 'PUT' ,**kwds)
-                        res = resp.json()
-                        data.extend(res.get('results', []))
-                    header = resp.headers  # Use header of final call
-                    self._json = data
-                else:
-                    data = None
             # Set private variables
             self._mdate = time()
             self._header = header