Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ instructions.
WKP. Links are BROKEN for BNC, BNF, DBC, and NUKAT. For all other
sources, the links will take you to the VIAF page.)

* Since early 2025, the VIAF API has more aggressively rate limited
its service. Be sure to set the viaf options in the `conciliator.properties`
file to use a thread pool size of 1, and add a 100ms delay after requests,
in order to try to prevent 429 response errors from VIAF.

### ORCID

* Uses the ORCID v2.1 API. The detailed search results of the v1.2 API
Expand Down
7 changes: 7 additions & 0 deletions conciliator.properties
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,10 @@ cache.size=64MB
# datasource.anothersolr.nametype.name=Person
# datasource.anothersolr.url.query=http://SOME_OTHER_SOLR_INSTANCE:8983/solr/SOME_COLLECTION/select?wt=xml&q={{QUERY}}&rows={{ROWS}}
# etc.

#### VIAF

## override default threadpool size for VIAF
datasource.viaf.threadpool.size=1
## delay after every request, in milliseconds, to self-rate limit
datasource.viaf.delay=200
1 change: 1 addition & 0 deletions run_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ docker run \
-p 127.0.0.1:8082:8082 \
-e TZ=`cat /etc/timezone` \
-v "$(pwd)/conciliator.log:/opt/conciliator/conciliator.log" \
-v "$(pwd)/conciliator.properties:/opt/conciliator/conciliator.properties" \
conciliator:latest
3 changes: 3 additions & 0 deletions src/main/java/com/codefork/refine/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ public class Config {
public static final String PROP_CACHE_TTL = "cache.ttl";
public static final String PROP_CACHE_SIZE = "cache.size";

public static final String PROP_DATASOURCE_THREADPOOL_SIZE = "threadpool.size";
public static final String PROP_DATASOURCE_DELAY = "delay";

private static final String CONFIG_FILENAME = "conciliator.properties";

private Log log = LogFactory.getLog(Config.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,27 +257,39 @@ private void updateStats(Collection<SearchResult> results) throws Exception {
* This is a "lower level" call than search(Map).
*
* @param query search to perform
* @param delay delay in milliseconds to sleep after a search query
* @return list of search results (a 0-size list if none, or if errors occurred)
*/
public List<Result> searchCheckCache(SearchQuery query) throws Exception {
public List<Result> searchCheckCache(SearchQuery query, int delay) throws Exception {
List<Result> results;
boolean retrievedFromCache = false;

if (isCacheEnabled()) {
Cache cache = getCacheManager().getCache(Application.CACHE_DEFAULT);

String key = getClass().getSimpleName() + "|" + query.getHashKey();
Cache.ValueWrapper value = cache.get(key);

List<Result> results;
if(value != null) {
log.info("Cache hit for: " + key);
results = (List<Result>) value.get();
retrievedFromCache = true;
} else {
results = search(query);
cache.put(key, results);
}
return results;
} else {
results = search(query);
}

return search(query);
if(!retrievedFromCache && delay > 0) {
try {
Thread.sleep(delay);
} catch (InterruptedException e) {
getLog().error("sleep interrupted in WebServiceSearchTask");
}
}
return results;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,17 @@ public class WebServiceSearchTask implements SearchTask {
private WebServiceDataSource dataSource;
private String key;
private SearchQuery searchQuery;
private int delay;

public WebServiceSearchTask(WebServiceDataSource dataSource, String key, SearchQuery searchQuery) {
public WebServiceSearchTask(WebServiceDataSource dataSource, String key, SearchQuery searchQuery, int delay) {
this.key = key;
this.searchQuery = searchQuery;
this.dataSource = dataSource;
this.delay = delay;
}

public WebServiceSearchTask(WebServiceDataSource dataSource, String key, SearchQuery searchQuery) {
this(dataSource, key, searchQuery, 0);
}

@Override
Expand All @@ -42,7 +48,7 @@ public SearchResult call() {
String key = getKey();
SearchQuery searchQuery = getSearchQuery();
try {
results = dataSource.searchCheckCache(searchQuery);
results = dataSource.searchCheckCache(searchQuery, delay);
} catch (Exception e) {
dataSource.getLog().error(String.format("error for query=%s", searchQuery.getQuery()), e);
if (e.toString().contains("HTTP response code: 429")) {
Expand All @@ -51,6 +57,7 @@ public SearchResult call() {
return new SearchResult(key, SearchResult.ErrorType.UNKNOWN);
}
results.sort(BY_SCORE_REVERSED);

return new SearchResult(key, results);
}

Expand Down
34 changes: 34 additions & 0 deletions src/main/java/com/codefork/refine/viaf/VIAF.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import com.codefork.refine.ThreadPool;
import com.codefork.refine.ThreadPoolFactory;
import com.codefork.refine.datasource.ConnectionFactory;
import com.codefork.refine.datasource.SearchTask;
import com.codefork.refine.datasource.WebServiceSearchTask;
import com.codefork.refine.datasource.stats.Stats;
import com.codefork.refine.datasource.WebServiceDataSource;
import com.codefork.refine.resources.NameType;
Expand Down Expand Up @@ -47,15 +49,39 @@ public class VIAF extends WebServiceDataSource {
private VIAFSource viafSource = null;
private Map<String, NonVIAFSource> nonViafSources = new HashMap<>();

private int delay = 0;

@Autowired
public VIAF(Config config, CacheManager cacheManager, ThreadPoolFactory threadPoolFactory, ConnectionFactory connectionFactory, Stats stats) {
super(config, cacheManager, threadPoolFactory, connectionFactory, stats);

setCacheEnabled(true);

var dataSourceProperties = config.getDataSourceProperties("viaf");

var threadPoolSize = dataSourceProperties.getProperty(Config.PROP_DATASOURCE_THREADPOOL_SIZE);
if(threadPoolSize != null) {
getLog().info("Setting pool size for VIAF to " + threadPoolSize);
getThreadPool().setPoolSize(Integer.parseInt(threadPoolSize.strip()));
}

var delay = dataSourceProperties.getProperty(Config.PROP_DATASOURCE_DELAY);
if(delay != null) {
getLog().info("Setting delay to " + delay + "ms");
setDelay(Integer.parseInt(delay.strip()));
}

spf = SAXParserFactory.newInstance();
}

public int getDelay() {
return delay;
}

public void setDelay(int delay) {
this.delay = delay;
}

/**
* Factory method for getting a NonVIAFSource object
*/
Expand Down Expand Up @@ -115,6 +141,14 @@ public static String createCqlQueryString(SearchQuery searchQuery) {
return cql;
}

/**
* Override so we can add an optional delay to the task
**/
@Override
public SearchTask createSearchTask(String key, SearchQuery searchQuery) {
return new WebServiceSearchTask(this, key, searchQuery, getDelay());
}

/**
* Does actual work of performing a search and parsing the XML.
* @param query
Expand Down