diff --git a/README.md b/README.md index dccd92d..36cf65a 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,11 @@ instructions. WKP. Links are BROKEN for BNC, BNF, DBC, and NUKAT. For all other sources, the links will take you to the VIAF page.) +* Since early 2025, the VIAF API has more aggressively rate limited + its service. Be sure to set the viaf options in the `conciliator.properties` + file to use a thread pool size of 1, and add a 100ms delay after requests, + in order to try to prevent 429 response errors from VIAF. + ### ORCID * Uses the ORCID v2.1 API. The detailed search results of the v1.2 API diff --git a/conciliator.properties b/conciliator.properties index 24feac9..0f13112 100644 --- a/conciliator.properties +++ b/conciliator.properties @@ -52,3 +52,10 @@ cache.size=64MB # datasource.anothersolr.nametype.name=Person # datasource.anothersolr.url.query=http://SOME_OTHER_SOLR_INSTANCE:8983/solr/SOME_COLLECTION/select?wt=xml&q={{QUERY}}&rows={{ROWS}} # etc. + +#### VIAF + +## override default threadpool size for VIAF +datasource.viaf.threadpool.size=1 +## delay after every request, in milliseconds, to self-rate limit +datasource.viaf.delay=200 diff --git a/run_docker.sh b/run_docker.sh index e1d8b88..48ec794 100755 --- a/run_docker.sh +++ b/run_docker.sh @@ -8,4 +8,5 @@ docker run \ -p 127.0.0.1:8082:8082 \ -e TZ=`cat /etc/timezone` \ -v "$(pwd)/conciliator.log:/opt/conciliator/conciliator.log" \ + -v "$(pwd)/conciliator.properties:/opt/conciliator/conciliator.properties" \ conciliator:latest diff --git a/src/main/java/com/codefork/refine/Config.java b/src/main/java/com/codefork/refine/Config.java index b0e0635..25d0f14 100644 --- a/src/main/java/com/codefork/refine/Config.java +++ b/src/main/java/com/codefork/refine/Config.java @@ -19,6 +19,9 @@ public class Config { public static final String PROP_CACHE_TTL = "cache.ttl"; public static final String PROP_CACHE_SIZE = "cache.size"; + public static final String PROP_DATASOURCE_THREADPOOL_SIZE = "threadpool.size"; + public static final String PROP_DATASOURCE_DELAY = "delay"; + private static final String CONFIG_FILENAME = "conciliator.properties"; private Log log = LogFactory.getLog(Config.class); diff --git a/src/main/java/com/codefork/refine/datasource/WebServiceDataSource.java b/src/main/java/com/codefork/refine/datasource/WebServiceDataSource.java index 09b99e3..57716a6 100644 --- a/src/main/java/com/codefork/refine/datasource/WebServiceDataSource.java +++ b/src/main/java/com/codefork/refine/datasource/WebServiceDataSource.java @@ -257,27 +257,39 @@ private void updateStats(Collection results) throws Exception { * This is a "lower level" call than search(Map). * * @param query search to perform + * @param delay delay in milliseconds to sleep after a search query * @return list of search results (a 0-size list if none, or if errors occurred) */ - public List searchCheckCache(SearchQuery query) throws Exception { + public List searchCheckCache(SearchQuery query, int delay) throws Exception { + List results; + boolean retrievedFromCache = false; + if (isCacheEnabled()) { Cache cache = getCacheManager().getCache(Application.CACHE_DEFAULT); String key = getClass().getSimpleName() + "|" + query.getHashKey(); Cache.ValueWrapper value = cache.get(key); - List results; if(value != null) { log.info("Cache hit for: " + key); results = (List) value.get(); + retrievedFromCache = true; } else { results = search(query); cache.put(key, results); } - return results; + } else { + results = search(query); } - return search(query); + if(!retrievedFromCache && delay > 0) { + try { + Thread.sleep(delay); + } catch (InterruptedException e) { + getLog().error("sleep interrupted in WebServiceSearchTask"); + } + } + return results; } /** diff --git a/src/main/java/com/codefork/refine/datasource/WebServiceSearchTask.java b/src/main/java/com/codefork/refine/datasource/WebServiceSearchTask.java index ce83c1f..74a41ce 100644 --- a/src/main/java/com/codefork/refine/datasource/WebServiceSearchTask.java +++ b/src/main/java/com/codefork/refine/datasource/WebServiceSearchTask.java @@ -19,11 +19,17 @@ public class WebServiceSearchTask implements SearchTask { private WebServiceDataSource dataSource; private String key; private SearchQuery searchQuery; + private int delay; - public WebServiceSearchTask(WebServiceDataSource dataSource, String key, SearchQuery searchQuery) { + public WebServiceSearchTask(WebServiceDataSource dataSource, String key, SearchQuery searchQuery, int delay) { this.key = key; this.searchQuery = searchQuery; this.dataSource = dataSource; + this.delay = delay; + } + + public WebServiceSearchTask(WebServiceDataSource dataSource, String key, SearchQuery searchQuery) { + this(dataSource, key, searchQuery, 0); } @Override @@ -42,7 +48,7 @@ public SearchResult call() { String key = getKey(); SearchQuery searchQuery = getSearchQuery(); try { - results = dataSource.searchCheckCache(searchQuery); + results = dataSource.searchCheckCache(searchQuery, delay); } catch (Exception e) { dataSource.getLog().error(String.format("error for query=%s", searchQuery.getQuery()), e); if (e.toString().contains("HTTP response code: 429")) { @@ -51,6 +57,7 @@ public SearchResult call() { return new SearchResult(key, SearchResult.ErrorType.UNKNOWN); } results.sort(BY_SCORE_REVERSED); + return new SearchResult(key, results); } diff --git a/src/main/java/com/codefork/refine/viaf/VIAF.java b/src/main/java/com/codefork/refine/viaf/VIAF.java index 664dc24..9df898a 100644 --- a/src/main/java/com/codefork/refine/viaf/VIAF.java +++ b/src/main/java/com/codefork/refine/viaf/VIAF.java @@ -6,6 +6,8 @@ import com.codefork.refine.ThreadPool; import com.codefork.refine.ThreadPoolFactory; import com.codefork.refine.datasource.ConnectionFactory; +import com.codefork.refine.datasource.SearchTask; +import com.codefork.refine.datasource.WebServiceSearchTask; import com.codefork.refine.datasource.stats.Stats; import com.codefork.refine.datasource.WebServiceDataSource; import com.codefork.refine.resources.NameType; @@ -47,15 +49,39 @@ public class VIAF extends WebServiceDataSource { private VIAFSource viafSource = null; private Map nonViafSources = new HashMap<>(); + private int delay = 0; + @Autowired public VIAF(Config config, CacheManager cacheManager, ThreadPoolFactory threadPoolFactory, ConnectionFactory connectionFactory, Stats stats) { super(config, cacheManager, threadPoolFactory, connectionFactory, stats); setCacheEnabled(true); + var dataSourceProperties = config.getDataSourceProperties("viaf"); + + var threadPoolSize = dataSourceProperties.getProperty(Config.PROP_DATASOURCE_THREADPOOL_SIZE); + if(threadPoolSize != null) { + getLog().info("Setting pool size for VIAF to " + threadPoolSize); + getThreadPool().setPoolSize(Integer.parseInt(threadPoolSize.strip())); + } + + var delay = dataSourceProperties.getProperty(Config.PROP_DATASOURCE_DELAY); + if(delay != null) { + getLog().info("Setting delay to " + delay + "ms"); + setDelay(Integer.parseInt(delay.strip())); + } + spf = SAXParserFactory.newInstance(); } + public int getDelay() { + return delay; + } + + public void setDelay(int delay) { + this.delay = delay; + } + /** * Factory method for getting a NonVIAFSource object */ @@ -115,6 +141,14 @@ public static String createCqlQueryString(SearchQuery searchQuery) { return cql; } + /** + * Override so we can add an optional delay to the task + **/ + @Override + public SearchTask createSearchTask(String key, SearchQuery searchQuery) { + return new WebServiceSearchTask(this, key, searchQuery, getDelay()); + } + /** * Does actual work of performing a search and parsing the XML. * @param query