@@ -61,6 +61,7 @@ function RollbackTest(name = "RollbackTest", replSet) {
6161 const SIGTERM = 15 ;
6262 const kNumDataBearingNodes = 3 ;
6363 const kElectableNodes = 2 ;
64+ const kForeverSecs = 24 * 60 * 60 ;
6465
6566 let rst ;
6667 let curPrimary ;
@@ -236,6 +237,9 @@ function RollbackTest(name = "RollbackTest", replSet) {
236237 `may prevent a rollback here.` ) ;
237238 }
238239
240+ // Unfreeze the node if it was previously frozen, so that it can run for the election.
241+ assert . commandWorked ( curSecondary . adminCommand ( { replSetFreeze : 0 } ) ) ;
242+
239243 // Ensure that the tiebreaker node is connected to the other nodes. We must do this after
240244 // we are sure that rollback has completed on the rollback node.
241245 tiebreakerNode . reconnect ( [ curPrimary , curSecondary ] ) ;
@@ -369,6 +373,12 @@ function RollbackTest(name = "RollbackTest", replSet) {
369373 this . transitionToSyncSourceOperationsDuringRollback = function ( ) {
370374 transitionIfAllowed ( State . kSyncSourceOpsDuringRollback ) ;
371375
376+ // If the nodes are restarted after the rollback node is able to rollback successfully and
377+ // catch up to curPrimary's oplog, then the rollback node can become the new primary.
378+ // If so, it can lead to unplanned state transitions, like unconditional step down, during
379+ // the test. To avoid those problems, prevent rollback node from starting an election.
380+ assert . commandWorked ( curSecondary . adminCommand ( { replSetFreeze : kForeverSecs } ) ) ;
381+
372382 log ( `Reconnecting the secondary ${ curSecondary . host } so it'll go into rollback` ) ;
373383 // Reconnect the rollback node to the current primary, which is the node we want to sync
374384 // from. If we reconnect to both the current primary and the tiebreaker node, the rollback
@@ -431,9 +441,39 @@ function RollbackTest(name = "RollbackTest", replSet) {
431441 log ( `Restarting node ${ hostName } ` ) ;
432442 rst . start ( nodeId , startOptions , true /* restart */ ) ;
433443
434- // Ensure that the primary is ready to take operations before continuing. If both nodes are
435- // connected to the tiebreaker node, the primary may switch.
444+ // Freeze the node if the restarted node is the rollback node.
445+ if ( curState === State . kSyncSourceOpsDuringRollback &&
446+ rst . getNodeId ( curSecondary ) === nodeId ) {
447+ assert . soon ( ( ) => {
448+ // Try stepping down the rollback node if it became the primary after its
449+ // restart, as it might have caught up with the original primary.
450+ curSecondary . adminCommand ( { "replSetStepDown" : kForeverSecs , "force" : true } ) ;
451+ try {
452+ // Prevent rollback node from running election. There is a chance that this
453+ // node might have started running election or became primary after
454+ // 'replSetStepDown' cmd, so 'replSetFreeze' cmd can fail.
455+ assert . commandWorked (
456+ curSecondary . adminCommand ( { "replSetFreeze" : kForeverSecs } ) ) ;
457+ return true ;
458+ } catch ( e ) {
459+ if ( e . code === ErrorCodes . NotSecondary ) {
460+ return false ;
461+ }
462+ throw e ;
463+ }
464+ } , `Failed to run replSetFreeze cmd on ${ curSecondary . host } ` ) ;
465+ }
466+
467+ const oldPrimary = curPrimary ;
468+ // Wait for the new primary to be elected and ready to take operations before continuing.
436469 curPrimary = rst . getPrimary ( ) ;
470+
471+ // The primary can change after node restarts only if all the 3 nodes are connected to each
472+ // other.
473+ if ( curState !== State . kSteadyStateOps ) {
474+ assert . eq ( curPrimary , oldPrimary ) ;
475+ }
476+
437477 curSecondary = rst . getSecondary ( ) ;
438478 assert . neq ( curPrimary , curSecondary ) ;
439479 } ;
0 commit comments