20
20
#include "mongocrypt-crypto-private.h"
21
21
#include "mongocrypt-ctx-private.h"
22
22
#include "mongocrypt-endpoint-private.h"
23
+ #include "mongocrypt-kek-private.h"
23
24
#include "mongocrypt-kms-ctx-private.h"
24
25
#include "mongocrypt-log-private.h"
25
26
#include "mongocrypt-opts-private.h"
26
27
#include "mongocrypt-private.h"
27
28
#include "mongocrypt-status-private.h"
28
29
#include "mongocrypt-util-private.h"
29
30
#include "mongocrypt.h"
31
+ #include <bson/bson.h>
30
32
#include <kms_message/kms_azure_request.h>
31
33
#include <kms_message/kms_b64.h>
32
34
#include <kms_message/kms_gcp_request.h>
@@ -142,6 +144,9 @@ _init_common(mongocrypt_kms_ctx_t *kms, _mongocrypt_log_t *log, _kms_request_typ
142
144
kms -> status = mongocrypt_status_new ();
143
145
kms -> req_type = kms_type ;
144
146
_mongocrypt_buffer_init (& kms -> result );
147
+ kms -> sleep_usec = 0 ;
148
+ kms -> attempts = 0 ;
149
+ kms -> should_retry = false;
145
150
}
146
151
147
152
bool _mongocrypt_kms_ctx_init_aws_decrypt (mongocrypt_kms_ctx_t * kms ,
@@ -427,11 +432,21 @@ uint32_t mongocrypt_kms_ctx_bytes_needed(mongocrypt_kms_ctx_t *kms) {
427
432
if (!mongocrypt_status_ok (kms -> status ) || !_mongocrypt_buffer_empty (& kms -> result )) {
428
433
return 0 ;
429
434
}
435
+ if (kms -> should_retry ) {
436
+ return 0 ;
437
+ }
430
438
want_bytes = kms_response_parser_wants_bytes (kms -> parser , DEFAULT_MAX_KMS_BYTE_REQUEST );
431
439
BSON_ASSERT (want_bytes >= 0 );
432
440
return (uint32_t )want_bytes ;
433
441
}
434
442
443
+ int64_t mongocrypt_kms_ctx_usleep (mongocrypt_kms_ctx_t * kms ) {
444
+ if (!kms ) {
445
+ return 0 ;
446
+ }
447
+ return kms -> sleep_usec ;
448
+ }
449
+
435
450
static void
436
451
_handle_non200_http_status (int http_status , const char * body , size_t body_len , mongocrypt_status_t * status ) {
437
452
BSON_ASSERT_PARAM (body );
@@ -455,6 +470,55 @@ _handle_non200_http_status(int http_status, const char *body, size_t body_len, m
455
470
CLIENT_ERR ("Error in KMS response. HTTP status=%d. Response body=\n%s" , http_status , body );
456
471
}
457
472
473
+ static int64_t backoff_time_usec (int64_t attempts ) {
474
+ static bool seeded = false;
475
+ if (!seeded ) {
476
+ srand ((uint32_t )time (NULL ));
477
+ seeded = true;
478
+ }
479
+
480
+ /* Exponential backoff with jitter. */
481
+ const int64_t base = 200000 ; /* 0.2 seconds */
482
+ const int64_t max = 20000000 ; /* 20 seconds */
483
+ BSON_ASSERT (attempts > 0 );
484
+ int64_t backoff = base * ((int64_t )1 << (attempts - 1 ));
485
+ if (backoff > max ) {
486
+ backoff = max ;
487
+ }
488
+
489
+ /* Full jitter: between 1 and current max */
490
+ return (int64_t )((double )rand () / (double )RAND_MAX * (double )backoff ) + 1 ;
491
+ }
492
+
493
+ static bool should_retry_http (int http_status , _kms_request_type_t t ) {
494
+ static const int retryable_aws [] = {408 , 429 , 500 , 502 , 503 , 509 };
495
+ static const int retryable_azure [] = {408 , 429 , 500 , 502 , 503 , 504 };
496
+ if (t == MONGOCRYPT_KMS_AWS_ENCRYPT || t == MONGOCRYPT_KMS_AWS_DECRYPT ) {
497
+ for (size_t i = 0 ; i < sizeof (retryable_aws ) / sizeof (retryable_aws [0 ]); i ++ ) {
498
+ if (http_status == retryable_aws [i ]) {
499
+ return true;
500
+ }
501
+ }
502
+ } else if (t == MONGOCRYPT_KMS_AZURE_WRAPKEY || t == MONGOCRYPT_KMS_AZURE_UNWRAPKEY ) {
503
+ for (size_t i = 0 ; i < sizeof (retryable_azure ) / sizeof (retryable_azure [0 ]); i ++ ) {
504
+ if (http_status == retryable_azure [i ]) {
505
+ return true;
506
+ }
507
+ }
508
+ } else if (t == MONGOCRYPT_KMS_GCP_ENCRYPT || t == MONGOCRYPT_KMS_GCP_DECRYPT ) {
509
+ if (http_status == 408 || http_status == 429 || http_status / 500 == 1 ) {
510
+ return true;
511
+ }
512
+ }
513
+ return false;
514
+ }
515
+
516
+ static void set_retry (mongocrypt_kms_ctx_t * kms ) {
517
+ kms -> should_retry = true;
518
+ kms -> attempts ++ ;
519
+ kms -> sleep_usec = backoff_time_usec (kms -> attempts );
520
+ }
521
+
458
522
/* An AWS KMS context has received full response. Parse out the result or error.
459
523
*/
460
524
static bool _ctx_done_aws (mongocrypt_kms_ctx_t * kms , const char * json_field ) {
@@ -485,6 +549,21 @@ static bool _ctx_done_aws(mongocrypt_kms_ctx_t *kms, const char *json_field) {
485
549
}
486
550
body = kms_response_get_body (response , & body_len );
487
551
552
+ if (kms -> retry_enabled && should_retry_http (http_status , kms -> req_type )) {
553
+ if (kms -> attempts >= kms_max_attempts ) {
554
+ // Wrap error to indicate maximum retries occurred.
555
+ _handle_non200_http_status (http_status , body , body_len , status );
556
+ CLIENT_ERR ("KMS request failed after maximum of %d retries: %s" ,
557
+ kms_max_attempts ,
558
+ mongocrypt_status_message (status , NULL ));
559
+ goto fail ;
560
+ } else {
561
+ ret = true;
562
+ set_retry (kms );
563
+ goto fail ;
564
+ }
565
+ }
566
+
488
567
if (http_status != 200 ) {
489
568
_handle_non200_http_status (http_status , body , body_len , status );
490
569
goto fail ;
@@ -643,6 +722,21 @@ static bool _ctx_done_azure_wrapkey_unwrapkey(mongocrypt_kms_ctx_t *kms) {
643
722
}
644
723
body = kms_response_get_body (response , & body_len );
645
724
725
+ if (kms -> retry_enabled && should_retry_http (http_status , kms -> req_type )) {
726
+ if (kms -> attempts >= kms_max_attempts ) {
727
+ // Wrap error to indicate maximum retries occurred.
728
+ _handle_non200_http_status (http_status , body , body_len , status );
729
+ CLIENT_ERR ("KMS request failed after maximum of %d retries: %s" ,
730
+ kms_max_attempts ,
731
+ mongocrypt_status_message (status , NULL ));
732
+ goto fail ;
733
+ } else {
734
+ ret = true;
735
+ set_retry (kms );
736
+ goto fail ;
737
+ }
738
+ }
739
+
646
740
if (body_len == 0 ) {
647
741
CLIENT_ERR ("Empty KMS response. HTTP status=%d" , http_status );
648
742
goto fail ;
@@ -737,6 +831,21 @@ static bool _ctx_done_gcp(mongocrypt_kms_ctx_t *kms, const char *json_field) {
737
831
}
738
832
body = kms_response_get_body (response , & body_len );
739
833
834
+ if (kms -> retry_enabled && should_retry_http (http_status , kms -> req_type )) {
835
+ if (kms -> attempts >= kms_max_attempts ) {
836
+ // Wrap error to indicate maximum retries occurred.
837
+ _handle_non200_http_status (http_status , body , body_len , status );
838
+ CLIENT_ERR ("KMS request failed after maximum of %d retries: %s" ,
839
+ kms_max_attempts ,
840
+ mongocrypt_status_message (status , NULL ));
841
+ goto fail ;
842
+ } else {
843
+ ret = true;
844
+ set_retry (kms );
845
+ goto fail ;
846
+ }
847
+ }
848
+
740
849
if (http_status != 200 ) {
741
850
_handle_non200_http_status (http_status , body , body_len , status );
742
851
goto fail ;
@@ -995,6 +1104,53 @@ static bool _ctx_done_kmip_decrypt(mongocrypt_kms_ctx_t *kms_ctx) {
995
1104
return ret ;
996
1105
}
997
1106
1107
+ bool mongocrypt_kms_ctx_fail (mongocrypt_kms_ctx_t * kms ) {
1108
+ if (!kms || !kms -> retry_enabled ) {
1109
+ return false;
1110
+ }
1111
+
1112
+ kms -> should_retry = false;
1113
+ mongocrypt_status_t * status = kms -> status ;
1114
+
1115
+ if (!kms -> retry_enabled ) {
1116
+ CLIENT_ERR ("KMS request failed due to network error" );
1117
+ return false;
1118
+ }
1119
+
1120
+ if (kms -> attempts >= kms_max_attempts ) {
1121
+ CLIENT_ERR ("KMS request failed after %d retries due to a network error" , kms_max_attempts );
1122
+ return false;
1123
+ }
1124
+
1125
+ // Check if request type is retryable. Some requests are non-idempotent and cannot be safely retried.
1126
+ _kms_request_type_t retryable_types [] = {MONGOCRYPT_KMS_AWS_ENCRYPT ,
1127
+ MONGOCRYPT_KMS_AWS_DECRYPT ,
1128
+ MONGOCRYPT_KMS_AZURE_WRAPKEY ,
1129
+ MONGOCRYPT_KMS_AZURE_UNWRAPKEY ,
1130
+ MONGOCRYPT_KMS_GCP_ENCRYPT ,
1131
+ MONGOCRYPT_KMS_GCP_DECRYPT };
1132
+ bool is_retryable = false;
1133
+ for (size_t i = 0 ; i < sizeof (retryable_types ) / sizeof (retryable_types [0 ]); i ++ ) {
1134
+ if (retryable_types [i ] == kms -> req_type ) {
1135
+ is_retryable = true;
1136
+ break ;
1137
+ }
1138
+ }
1139
+ if (!is_retryable ) {
1140
+ CLIENT_ERR ("KMS request failed due to network error" );
1141
+ return false;
1142
+ }
1143
+
1144
+ // Mark KMS context as retryable. Return again in `mongocrypt_ctx_next_kms_ctx`.
1145
+ set_retry (kms );
1146
+
1147
+ // Reset intermediate state of parser.
1148
+ if (kms -> parser ) {
1149
+ kms_response_parser_reset (kms -> parser );
1150
+ }
1151
+ return true;
1152
+ }
1153
+
998
1154
bool mongocrypt_kms_ctx_feed (mongocrypt_kms_ctx_t * kms , mongocrypt_binary_t * bytes ) {
999
1155
if (!kms ) {
1000
1156
return false;
0 commit comments