Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -862,17 +862,14 @@ public boolean moveShards() {
for (var storedShardMovement : bestNonPreferredShardMovementsTracker.getBestShardMovements()) {
final var shardRouting = storedShardMovement.shardRouting();
final var index = projectIndex(shardRouting);
// If `shardMoved` is true, there may have been moves that have made our previous move decision
// invalid, so we must call `decideMove` again. If not, we know we haven't made any moves, and we
// can use the cached decision.
final var moveDecision = shardMoved ? decideMove(index, shardRouting) : storedShardMovement.moveDecision();
final var moveDecision = reassessDecisionIfRequired(index, storedShardMovement, shardMoved);
if (moveDecision.isDecisionTaken() && moveDecision.cannotRemainAndCanMove()) {
if (notPreferredLogger.isDebugEnabled()) {
notPreferredLogger.debug(
"Moving shard [{}] to [{}] from a NOT_PREFERRED allocation, explanation is [{}]",
"Moving shard [{}] to [{}] from a NOT_PREFERRED allocation: {}",
shardRouting,
moveDecision.getTargetNode().getName(),
moveDecision.getCanRemainDecision().getExplanation()
moveDecision.getCanRemainDecision()
);
}
executeMove(shardRouting, index, moveDecision, "move-non-preferred");
Expand All @@ -886,6 +883,36 @@ public boolean moveShards() {
return shardMoved;
}

/**
* If a shard has moved we need to re-run the decision to check that it's still valid. If {@link #notPreferredLogger} is
* set to debug, we re-run the decision with explain turned on so we can populate the explanation in our log message
*
* @param index The index of the shard
* @param storedShardMovement The existing shard movement decision
* @param shardMoved True if a shard moved in this balancing round, false otherwise
* @return The move decision to act on
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found

"If a shard has moved we need to re-run the decision to check that it's still valid."

to be difficult to follow. Could it be replaced with something like

"If any shards in the cluster have been relocated since the stored shard movement was calculated, the move decision must be re-run to ensure it is still allowed."

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Improved(?) in c9a8751

*/
private MoveDecision reassessDecisionIfRequired(
ProjectIndex index,
BestShardMovementsTracker.StoredShardMovement storedShardMovement,
boolean shardMoved
) {
final boolean needToReassess = notPreferredLogger.isDebugEnabled() || shardMoved;
if (needToReassess == false) {
return storedShardMovement.moveDecision();
}

final var oldDebugMode = allocation.getDebugMode();
if (notPreferredLogger.isDebugEnabled()) {
allocation.setDebugMode(RoutingAllocation.DebugMode.EXCLUDE_YES_DECISIONS);
}
Comment on lines +916 to +918
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (notPreferredLogger.isDebugEnabled()) {
allocation.setDebugMode(RoutingAllocation.DebugMode.EXCLUDE_YES_DECISIONS);
}
allocation.setDebugMode(notPreferredLogger.isDebugEnabled() ? RoutingAllocation.DebugMode.EXCLUDE_YES_DECISIONS : oldDebugMode);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I left this one because I don't think either form is significantly more or less readable, and it seems weird to set the value if it's not being changed.

try {
return decideMove(index, storedShardMovement.shardRouting());
} finally {
allocation.setDebugMode(oldDebugMode);
}
}

private void executeMove(ShardRouting shardRouting, ProjectIndex index, MoveDecision moveDecision, String reason) {
final ModelNode sourceNode = nodes.get(shardRouting.currentNodeId());
final ModelNode targetNode = nodes.get(moveDecision.getTargetNode().getId());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1027,7 +1027,7 @@ public Decision canRemain(
RoutingNode node,
RoutingAllocation allocation
) {
return new Decision.Single(Decision.Type.NOT_PREFERRED, "test_decider", "Always NOT_PREFERRED");
return allocation.decision(Decision.NOT_PREFERRED, "test_decider", "Always NOT_PREFERRED");
}
})), clusterState.getRoutingNodes().mutableCopy(), clusterState, ClusterInfo.EMPTY, SnapshotShardSizeInfo.EMPTY, 0L);

Expand All @@ -1039,7 +1039,7 @@ public Decision canRemain(
"moved a NOT_PREFERRED allocation",
notPreferredLoggerName,
Level.DEBUG,
"Moving shard [*] to [*] from a NOT_PREFERRED allocation, explanation is [Always NOT_PREFERRED]"
"Moving shard [*] to [*] from a NOT_PREFERRED allocation: [NOT_PREFERRED(Always NOT_PREFERRED)]"
)
);
}
Expand Down