9696import org .opensearch .threadpool .ThreadPool ;
9797import org .opensearch .transport .client .Client ;
9898
99+ import static org .opensearch .security .support .ConfigConstants .DEFAULT_TIMEOUT ;
99100import static org .opensearch .security .support .ConfigConstants .SECURITY_ALLOW_DEFAULT_INIT_USE_CLUSTER_STATE ;
100101import static org .opensearch .security .support .SnapshotRestoreHelper .isSecurityIndexRestoredFromSnapshot ;
101102
102103public class ConfigurationRepository implements ClusterStateListener , IndexEventListener {
103104 private static final Logger LOGGER = LogManager .getLogger (ConfigurationRepository .class );
105+ private static final int INITIALIZATION_RETRY_INTERVAL_MS = 3000 ;
106+ private static final int INITIALIZATION_MAX_DURATION_MS = 5 * 60 * 1000 ;
107+ private static final int MAX_RETRIES = INITIALIZATION_MAX_DURATION_MS / INITIALIZATION_RETRY_INTERVAL_MS ;
104108
105109 private final String securityIndex ;
106110 private final Client client ;
@@ -273,26 +277,33 @@ private void initalizeClusterConfiguration(final boolean installDefaultConfig) {
273277 LOGGER .error ("{} does not exist" , confFile .getAbsolutePath ());
274278 }
275279 } catch (Exception e ) {
276- LOGGER .error ("Cannot apply default config (this is maybe not an error!)" , e );
280+ LOGGER .error ("Cannot apply default config" , e );
281+ throw e ;
277282 }
278283 }
279284
280- while ( !dynamicConfigFactory .isInitialized ()) {
285+ for ( int retryCount = 0 ; retryCount < MAX_RETRIES && !dynamicConfigFactory .isInitialized (); retryCount ++ ) {
281286 try {
282- LOGGER .debug ("Try to load config ..." );
287+ LOGGER .info ("Try to load config ..." );
283288 reloadConfiguration (CType .values (), true );
284289 break ;
285290 } catch (Exception e ) {
286- LOGGER .debug ("Unable to load configuration due to {}" , String .valueOf (ExceptionUtils .getRootCause (e )));
291+ LOGGER .error ("Unable to load configuration due to {}" , String .valueOf (ExceptionUtils .getRootCause (e )));
287292 try {
288- TimeUnit .MILLISECONDS .sleep (3000 );
293+ TimeUnit .MILLISECONDS .sleep (INITIALIZATION_RETRY_INTERVAL_MS );
289294 } catch (InterruptedException e1 ) {
290295 Thread .currentThread ().interrupt ();
291- LOGGER .debug ("Thread was interrupted so we cancel initialization" );
296+ LOGGER .error ("Thread was interrupted so we cancel initialization" );
292297 break ;
293298 }
294299 }
295300 }
301+
302+ if (!dynamicConfigFactory .isInitialized ()) {
303+ LOGGER .error ("Node '{}' failed to initialize" , clusterService .localNode ().getName ());
304+ throw new IllegalStateException (String .format ("Node '%s' failed to initialize" , clusterService .localNode ().getName ()));
305+ }
306+
296307 setupAuditConfigurationIfAny (cl .isAuditConfigDocPresentInIndex ());
297308 LOGGER .info ("Node '{}' initialized" , clusterService .localNode ().getName ());
298309
@@ -325,7 +336,8 @@ private void setupAuditConfigurationIfAny(final boolean auditConfigDocPresent) {
325336 private boolean createSecurityIndexIfAbsent () {
326337 try {
327338 final Map <String , Object > indexSettings = ImmutableMap .of ("index.number_of_shards" , 1 , "index.auto_expand_replicas" , "0-all" );
328- final CreateIndexRequest createIndexRequest = new CreateIndexRequest (securityIndex ).settings (indexSettings );
339+ final CreateIndexRequest createIndexRequest = new CreateIndexRequest (securityIndex ).timeout (DEFAULT_TIMEOUT )
340+ .settings (indexSettings );
329341 final boolean ok = client .admin ().indices ().create (createIndexRequest ).actionGet ().isAcknowledged ();
330342 LOGGER .info ("Index {} created?: {}" , securityIndex , ok );
331343 return ok ;
@@ -341,14 +353,14 @@ private void waitForSecurityIndexToBeAtLeastYellow() {
341353 try {
342354 response = client .admin ()
343355 .cluster ()
344- .health (new ClusterHealthRequest (securityIndex ).waitForActiveShards (1 ).waitForYellowStatus ())
356+ .health (new ClusterHealthRequest (securityIndex ).waitForActiveShards (1 ).waitForYellowStatus (). timeout ( DEFAULT_TIMEOUT ) )
345357 .actionGet ();
346358 } catch (Exception e ) {
347- LOGGER .debug ("Caught a {} but we just try again ..." , e .toString ());
359+ LOGGER .error ("Caught a {} but we just try again ..." , e .toString ());
348360 }
349361
350362 while (response == null || response .isTimedOut () || response .getStatus () == ClusterHealthStatus .RED ) {
351- LOGGER .debug (
363+ LOGGER .error (
352364 "index '{}' not healthy yet, we try again ... (Reason: {})" ,
353365 securityIndex ,
354366 response == null ? "no response" : (response .isTimedOut () ? "timeout" : "other, maybe red cluster" )
@@ -360,9 +372,12 @@ private void waitForSecurityIndexToBeAtLeastYellow() {
360372 Thread .currentThread ().interrupt ();
361373 }
362374 try {
363- response = client .admin ().cluster ().health (new ClusterHealthRequest (securityIndex ).waitForYellowStatus ()).actionGet ();
375+ response = client .admin ()
376+ .cluster ()
377+ .health (new ClusterHealthRequest (securityIndex ).waitForYellowStatus ().timeout (DEFAULT_TIMEOUT ))
378+ .actionGet ();
364379 } catch (Exception e ) {
365- LOGGER .debug ("Caught again a {} but we just try again ..." , e .toString ());
380+ LOGGER .error ("Caught again a {} but we just try again ..." , e .toString ());
366381 }
367382 }
368383 }
0 commit comments