From da19785d900673fbc59a319a161a5f0f840f8123 Mon Sep 17 00:00:00 2001 From: Erwin Pe Date: Tue, 28 Jan 2025 17:58:36 +0000 Subject: [PATCH 1/8] MONGOCRYPT-762 Generate text search token sets from StrEncode output --- src/mongocrypt-buffer-private.h | 6 + src/mongocrypt-buffer.c | 29 ++++- src/mongocrypt-marking.c | 171 ++++++++++++++++---------- test/test-mongocrypt-buffer.c | 24 ++++ test/test-mongocrypt-marking.c | 205 ++++++++++++++++++++++++++++++-- 5 files changed, 360 insertions(+), 75 deletions(-) diff --git a/src/mongocrypt-buffer-private.h b/src/mongocrypt-buffer-private.h index 18a604777..127b91846 100644 --- a/src/mongocrypt-buffer-private.h +++ b/src/mongocrypt-buffer-private.h @@ -162,4 +162,10 @@ bool _mongocrypt_buffer_from_subrange(_mongocrypt_buffer_t *out, uint32_t offset, uint32_t len) MONGOCRYPT_WARN_UNUSED_RESULT; +/* _mongocrypt_buffer_copy_from_string_as_bson_value initializes @out, wraps the provided string + * into a BSON value, and copies the BSON value to @out. No BSON validation is performed on @str. + * Caller must call _mongocrypt_buffer_cleanup. + */ +void _mongocrypt_buffer_copy_from_string_as_bson_value(_mongocrypt_buffer_t *out, const char *str, int len); + #endif /* MONGOCRYPT_BUFFER_H */ diff --git a/src/mongocrypt-buffer.c b/src/mongocrypt-buffer.c index fb872d5ce..846962252 100644 --- a/src/mongocrypt-buffer.c +++ b/src/mongocrypt-buffer.c @@ -317,7 +317,10 @@ bool _mongocrypt_buffer_to_bson_value(_mongocrypt_buffer_t *plaintext, uint8_t t return ret; } -void _mongocrypt_buffer_from_iter(_mongocrypt_buffer_t *plaintext, bson_iter_t *iter) { +static void _mongocrypt_buffer_copy_as_bson_value(_mongocrypt_buffer_t *plaintext, + bool (*append_func)(bson_t *bson, const void *data, int len), + const void *data, + int len) { bson_t wrapper = BSON_INITIALIZER; int32_t offset = INT32_LEN /* skips document size */ + TYPE_LEN /* element type */ @@ -326,13 +329,14 @@ void _mongocrypt_buffer_from_iter(_mongocrypt_buffer_t *plaintext, bson_iter_t * uint8_t *wrapper_data; BSON_ASSERT_PARAM(plaintext); - BSON_ASSERT_PARAM(iter); + BSON_ASSERT_PARAM(append_func); /* It is not straightforward to transform a bson_value_t to a string of * bytes. As a workaround, we wrap the value in a bson document with an empty * key, then use the raw buffer from inside the new bson_t, skipping the * length and type header information and the key name. */ - bson_append_iter(&wrapper, "", 0, iter); + append_func(&wrapper, data, len); + wrapper_data = ((uint8_t *)bson_get_data(&wrapper)); BSON_ASSERT(wrapper.len >= (uint32_t)offset + NULL_BYTE_LEN); plaintext->len = wrapper.len - (uint32_t)offset - NULL_BYTE_LEN; /* the final null byte */ @@ -345,6 +349,25 @@ void _mongocrypt_buffer_from_iter(_mongocrypt_buffer_t *plaintext, bson_iter_t * bson_destroy(&wrapper); } +bool append_iter(bson_t *bson, const void *iter, int len) { + return bson_append_iter(bson, "", 0, (const bson_iter_t *)iter); +} + +bool append_utf8(bson_t *bson, const void *str, int len) { + return bson_append_utf8(bson, "", 0, (const char *)str, len); +} + +void _mongocrypt_buffer_copy_from_string_as_bson_value(_mongocrypt_buffer_t *plaintext, const char *str, int len) { + BSON_ASSERT_PARAM(str); + BSON_ASSERT(len >= 0); + _mongocrypt_buffer_copy_as_bson_value(plaintext, append_utf8, str, len); +} + +void _mongocrypt_buffer_from_iter(_mongocrypt_buffer_t *plaintext, bson_iter_t *iter) { + BSON_ASSERT_PARAM(iter); + _mongocrypt_buffer_copy_as_bson_value(plaintext, append_iter, iter, 0); +} + bool _mongocrypt_buffer_from_uuid_iter(_mongocrypt_buffer_t *buf, bson_iter_t *iter) { const uint8_t *data; bson_subtype_t subtype; diff --git a/src/mongocrypt-marking.c b/src/mongocrypt-marking.c index ba8ebbb43..76b92ea35 100644 --- a/src/mongocrypt-marking.c +++ b/src/mongocrypt-marking.c @@ -29,6 +29,8 @@ #include "mc-range-edge-generation-private.h" #include "mc-range-encoding-private.h" #include "mc-range-mincover-private.h" +#include "mc-str-encode-string-sets-private.h" +#include "mc-text-search-str-encode-private.h" #include "mc-tokens-private.h" #include "mongocrypt-buffer-private.h" #include "mongocrypt-ciphertext-private.h" @@ -1126,26 +1128,22 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, mc_FLE2InsertUpdatePayloadV2_t *payload, const _mongocrypt_buffer_t *indexKeyId, const mc_FLE2TextSearchInsertSpec_t *spec, - const _mongocrypt_buffer_t *value, int64_t contentionFactor, mongocrypt_status_t *status) { BSON_ASSERT_PARAM(kb); BSON_ASSERT_PARAM(payload); BSON_ASSERT_PARAM(indexKeyId); BSON_ASSERT_PARAM(spec); - BSON_ASSERT_PARAM(value); _mongocrypt_crypto_t *crypto = kb->crypt->crypto; mc_TextSearchTokenSets_t *tsts = &payload->textSearchTokenSets.tsts; _FLE2EncryptedPayloadCommon_t common = {{0}}; bool res = false; - // TODO MONGOCRYPT-759 implement case folding; for now let foldedValue be a copy of value. - _mongocrypt_buffer_t foldedValue = {0}; - _mongocrypt_buffer_init(&foldedValue); - _mongocrypt_buffer_copy_to(value, &foldedValue); - - // TODO MONGOCRYPT-762 do StrEncode here to get substring sets to encode + mc_str_encode_sets_t *encodeSets = mc_text_search_str_encode(spec, status); + if (!encodeSets) { + goto fail; + } // Start the token derivations if (!_get_tokenKey(kb, indexKeyId, &common.tokenKey, status)) { @@ -1164,72 +1162,126 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, goto fail; } - if (!_fle2_generate_TextExactTokenSet(kb, - &tsts->exact, - &foldedValue, - contentionFactor, - common.collectionsLevel1Token, - common.serverTokenDerivationLevel1Token, - status)) { - goto fail; + // Generate exact token set singleton + { + _mongocrypt_buffer_t asBsonValue; + _mongocrypt_buffer_init(&asBsonValue); + _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, + (const char *)encodeSets->exact.data, + encodeSets->exact.len); + if (!_fle2_generate_TextExactTokenSet(kb, + &tsts->exact, + &asBsonValue, + contentionFactor, + common.collectionsLevel1Token, + common.serverTokenDerivationLevel1Token, + status)) { + goto fail; + } + _mongocrypt_buffer_cleanup(&asBsonValue); } + const char *substring; + uint32_t bytelen; + uint32_t appendCount; + + // Generate array of substring token sets if (spec->substr.set) { - // TODO MONGOCRYPT-762 iterate on StrEncode substrings set - mc_TextSubstringTokenSet_t substrSet = {{0}}; - mc_TextSubstringTokenSet_init(&substrSet); - - if (!_fle2_generate_TextSubstringTokenSet(kb, - &substrSet, - &foldedValue, - contentionFactor, - common.collectionsLevel1Token, - common.serverTokenDerivationLevel1Token, - status)) { - mc_TextSubstringTokenSet_cleanup(&substrSet); - goto fail; + mc_substring_set_iter_t set_itr; + mc_substring_set_iter_init(&set_itr, encodeSets->substring_set); + + while (mc_substring_set_iter_next(&set_itr, &substring, &bytelen, &appendCount)) { + for (; appendCount > 0; appendCount--) { + mc_TextSubstringTokenSet_t tset = {{0}}; + mc_TextSubstringTokenSet_init(&tset); + + _mongocrypt_buffer_t asBsonValue; + _mongocrypt_buffer_init(&asBsonValue); + _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, bytelen); + + if (!_fle2_generate_TextSubstringTokenSet(kb, + &tset, + &asBsonValue, + contentionFactor, + common.collectionsLevel1Token, + common.serverTokenDerivationLevel1Token, + status)) { + _mongocrypt_buffer_cleanup(&asBsonValue); + mc_TextSubstringTokenSet_cleanup(&tset); + goto fail; + } + _mc_array_append_val(&tsts->substringArray, tset); // moves ownership of tset + _mongocrypt_buffer_cleanup(&asBsonValue); + } } - _mc_array_append_val(&tsts->substringArray, substrSet); } + + // Generate array of suffix token sets if (spec->suffix.set) { - // TODO MONGOCRYPT-762 iterate on StrEncode suffixes set - mc_TextSuffixTokenSet_t suffixSet = {{0}}; - mc_TextSuffixTokenSet_init(&suffixSet); - - if (!_fle2_generate_TextSuffixTokenSet(kb, - &suffixSet, - &foldedValue, - contentionFactor, - common.collectionsLevel1Token, - common.serverTokenDerivationLevel1Token, - status)) { - mc_TextSuffixTokenSet_cleanup(&suffixSet); - goto fail; + mc_affix_set_iter_t set_itr; + mc_affix_set_iter_init(&set_itr, encodeSets->suffix_set); + + while (mc_affix_set_iter_next(&set_itr, &substring, &bytelen, &appendCount)) { + for (; appendCount > 0; appendCount--) { + mc_TextSuffixTokenSet_t tset = {{0}}; + mc_TextSuffixTokenSet_init(&tset); + + _mongocrypt_buffer_t asBsonValue; + _mongocrypt_buffer_init(&asBsonValue); + _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, bytelen); + + if (!_fle2_generate_TextSuffixTokenSet(kb, + &tset, + &asBsonValue, + contentionFactor, + common.collectionsLevel1Token, + common.serverTokenDerivationLevel1Token, + status)) { + _mongocrypt_buffer_cleanup(&asBsonValue); + mc_TextSuffixTokenSet_cleanup(&tset); + goto fail; + } + _mc_array_append_val(&tsts->suffixArray, tset); // moves ownership of tset + _mongocrypt_buffer_cleanup(&asBsonValue); + } } - _mc_array_append_val(&tsts->suffixArray, suffixSet); } + + // Generate array of prefix token sets if (spec->prefix.set) { - // TODO MONGOCRYPT-762 iterate on StrEncode suffixes set - mc_TextPrefixTokenSet_t prefixSet = {{0}}; - mc_TextPrefixTokenSet_init(&prefixSet); - - if (!_fle2_generate_TextPrefixTokenSet(kb, - &prefixSet, - &foldedValue, - contentionFactor, - common.collectionsLevel1Token, - common.serverTokenDerivationLevel1Token, - status)) { - mc_TextPrefixTokenSet_cleanup(&prefixSet); - goto fail; + mc_affix_set_iter_t set_itr; + mc_affix_set_iter_init(&set_itr, encodeSets->prefix_set); + + while (mc_affix_set_iter_next(&set_itr, &substring, &bytelen, &appendCount)) { + for (; appendCount > 0; appendCount--) { + mc_TextPrefixTokenSet_t tset = {{0}}; + mc_TextPrefixTokenSet_init(&tset); + + _mongocrypt_buffer_t asBsonValue; + _mongocrypt_buffer_init(&asBsonValue); + _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, bytelen); + + if (!_fle2_generate_TextPrefixTokenSet(kb, + &tset, + &asBsonValue, + contentionFactor, + common.collectionsLevel1Token, + common.serverTokenDerivationLevel1Token, + status)) { + _mongocrypt_buffer_cleanup(&asBsonValue); + mc_TextPrefixTokenSet_cleanup(&tset); + goto fail; + } + _mc_array_append_val(&tsts->prefixArray, tset); // moves ownership of tset + _mongocrypt_buffer_cleanup(&asBsonValue); + } } - _mc_array_append_val(&tsts->prefixArray, prefixSet); } payload->textSearchTokenSets.set = true; res = true; fail: _FLE2EncryptedPayloadCommon_cleanup(&common); - _mongocrypt_buffer_cleanup(&foldedValue); + mc_str_encode_sets_destroy(encodeSets); return res; } @@ -1350,7 +1402,6 @@ static bool _mongocrypt_fle2_placeholder_to_insert_update_ciphertextForTextSearc &payload, &placeholder->index_key_id, &insertSpec, - &value, payload.contentionFactor, status)) { goto fail; diff --git a/test/test-mongocrypt-buffer.c b/test/test-mongocrypt-buffer.c index bb1bf441b..c931eb1ce 100644 --- a/test/test-mongocrypt-buffer.c +++ b/test/test-mongocrypt-buffer.c @@ -16,6 +16,7 @@ #include +#include "mongocrypt-buffer-private.h" #include "test-mongocrypt-assert.h" #include "test-mongocrypt.h" @@ -232,6 +233,28 @@ static void _test_mongocrypt_buffer_from_subrange(_mongocrypt_tester_t *tester) _mongocrypt_buffer_cleanup(&input); } +static void _test_mongocrypt_buffer_copy_from_string_as_bson_value(_mongocrypt_tester_t *tester) { + _mongocrypt_buffer_t buf; + _mongocrypt_buffer_t expectedLenBuf; + const char *data = "foobar"; + + // expect output to contain 4-byte length + data + null string terminator + uint32_t expectedLen = sizeof(int32_t) + strlen(data) + sizeof(uint8_t); + _mongocrypt_buffer_copy_from_hex(&expectedLenBuf, "07000000"); + + _mongocrypt_buffer_copy_from_string_as_bson_value(&buf, data, strlen(data)); + ASSERT(buf.len == expectedLen); + + // check 4-byte length + ASSERT_CMPBYTES(expectedLenBuf.data, expectedLenBuf.len, buf.data, expectedLenBuf.len); + // check data + null byte + ASSERT_CMPBYTES((const uint8_t *)data, + strlen(data) + 1, + buf.data + expectedLenBuf.len, + buf.len - expectedLenBuf.len); + _mongocrypt_buffer_cleanup(&expectedLenBuf); +} + void _mongocrypt_tester_install_buffer(_mongocrypt_tester_t *tester) { INSTALL_TEST(_test_mongocrypt_buffer_from_iter); INSTALL_TEST(_test_mongocrypt_buffer_copy_from_data_and_size); @@ -239,4 +262,5 @@ void _mongocrypt_tester_install_buffer(_mongocrypt_tester_t *tester) { INSTALL_TEST(_test_mongocrypt_buffer_steal_from_string); INSTALL_TEST(_test_mongocrypt_buffer_copy_from_uint64_le); INSTALL_TEST(_test_mongocrypt_buffer_from_subrange); + INSTALL_TEST(_test_mongocrypt_buffer_copy_from_string_as_bson_value); } diff --git a/test/test-mongocrypt-marking.c b/test/test-mongocrypt-marking.c index ab472198c..19e305fed 100644 --- a/test/test-mongocrypt-marking.c +++ b/test/test-mongocrypt-marking.c @@ -26,6 +26,7 @@ #include "test-mongocrypt-assert.h" #include "test-mongocrypt.h" #include +#include /* Create a basis marking buffer with valid values for the given fields. */ static void _make_marking(bson_t *bson, _mongocrypt_buffer_t *buf) { @@ -1251,11 +1252,11 @@ static size_t validate_text_search_token_set_array_common(bson_iter_t *iter_at_a return count; } -typedef enum { - TEXTSEARCH_SPEC_HAS_SUBSTRING = 1 << 0, - TEXTSEARCH_SPEC_HAS_SUFFIX = 1 << 1, - TEXTSEARCH_SPEC_HAS_PREFIX = 1 << 2, -} text_search_spec_query_type_flags; +typedef struct { + size_t substrings; + size_t suffixes; + size_t prefixes; +} text_search_expected_token_counts; // Assert that the fields in a insert/update payload V2 for text search match our expectations. // Specifically, checks that the length of these fields, and the values of deterministic fields, @@ -1266,7 +1267,7 @@ static void validate_text_search_ciphertext(_mongocrypt_tester_t *tester, const char *text_value, mongocrypt_fle2_placeholder_type_t type, uint64_t contention_max, - text_search_spec_query_type_flags flags) { + text_search_expected_token_counts expected_tag_counts) { bson_t iup_bson; bson_iter_t iter; ASSERT(_mongocrypt_buffer_to_bson(&ciphertext->data, &iup_bson)); @@ -1344,21 +1345,105 @@ static void validate_text_search_ciphertext(_mongocrypt_tester_t *tester, size_t tscount = 0; ASSERT(bson_iter_init_find(&b_iter, &b_bson, "s")); tscount = validate_text_search_token_set_array_common(&b_iter, crypt); - ASSERT((tscount > 0) == !!(flags & TEXTSEARCH_SPEC_HAS_SUBSTRING)); + ASSERT(expected_tag_counts.substrings == tscount); ASSERT(bson_iter_init_find(&b_iter, &b_bson, "u")); tscount = validate_text_search_token_set_array_common(&b_iter, crypt); - ASSERT((tscount > 0) == !!(flags & TEXTSEARCH_SPEC_HAS_SUFFIX)); + ASSERT(expected_tag_counts.suffixes == tscount); ASSERT(bson_iter_init_find(&b_iter, &b_bson, "p")); tscount = validate_text_search_token_set_array_common(&b_iter, crypt); - ASSERT((tscount > 0) == !!(flags & TEXTSEARCH_SPEC_HAS_PREFIX)); + ASSERT(expected_tag_counts.prefixes == tscount); } mc_ServerDataEncryptionLevel1Token_destroy(sdel1Token); bson_destroy(&iup_bson); } +static size_t calculate_expected_substring_tag_count(size_t beta, size_t mlen, size_t ub, size_t lb) { + ASSERT(beta <= SIZE_MAX - 15) + ASSERT(lb <= ub); + ASSERT(mlen >= ub); + size_t cbclen = 16 * ((beta + 15) / 16); + if (beta > mlen || lb > cbclen) { + return 0; + } + size_t maxkgram1 = 0; + size_t maxkgram2 = 0; + for (size_t j = lb; j <= ub; j++) { + maxkgram1 += (mlen - j + 1); + } + for (size_t j = lb; j <= BSON_MIN(ub, cbclen); j++) { + maxkgram2 += (cbclen - j + 1); + } + return BSON_MIN(maxkgram1, maxkgram2); // msize +} + +static size_t calculate_expected_nfix_tag_count(size_t beta, size_t ub, size_t lb) { + ASSERT(beta <= SIZE_MAX - 15) + ASSERT(lb <= ub); + size_t cbclen = 16 * ((beta + 15) / 16); + if (lb > cbclen) { + return 0; + } + return BSON_MIN(ub, cbclen) - lb + 1; +} + +// Runs _mongocrypt_marking_to_ciphertext to compute the ciphertext for the given marking. +static bool test_text_search_insert_marking_to_ciphertext(_mongocrypt_tester_t *tester, + mongocrypt_t *crypt, + _mongocrypt_ciphertext_t *out, + const char *test_string, + int test_string_len, + int mlen, + mongocrypt_status_t *status) { + ASSERT(mlen > 0); + mongocrypt_ctx_t *ctx = mongocrypt_ctx_new(crypt); + // Set up encryption environment + ASSERT_OK(mongocrypt_ctx_encrypt_init(ctx, "test", -1, TEST_FILE("./test/example/cmd.json")), ctx); + // Add a test key + _mongocrypt_buffer_t keyId; + _mongocrypt_buffer_from_binary(&keyId, TEST_BIN(16)); + keyId.subtype = BSON_SUBTYPE_UUID; + _mongocrypt_key_broker_add_test_key(&ctx->kb, &keyId); + + _mongocrypt_buffer_t marking_buf; + _mongocrypt_marking_t marking; + + bson_t *marking_bson = bson_new(); + BSON_APPEND_INT32(marking_bson, "t", 1); + BSON_APPEND_INT32(marking_bson, "a", 4); + BSON_APPEND_INT64(marking_bson, "cm", 2); + bson_t text_spec; + BSON_APPEND_DOCUMENT_BEGIN(marking_bson, "v", &text_spec); + bson_append_utf8(&text_spec, "v", 1, test_string, test_string_len); + BSON_APPEND_BOOL(&text_spec, "casef", false); + BSON_APPEND_BOOL(&text_spec, "diacf", false); + bson_t subspec; + BSON_APPEND_DOCUMENT_BEGIN(&text_spec, "substr", &subspec); + BSON_APPEND_INT32(&subspec, "mlen", mlen); + BSON_APPEND_INT32(&subspec, "ub", 1); + BSON_APPEND_INT32(&subspec, "lb", 1); + ASSERT(bson_append_document_end(&text_spec, &subspec)); + ASSERT(bson_append_document_end(marking_bson, &text_spec)); + + // Add key identifier info to the marking + BSON_APPEND_BINARY(marking_bson, "ki", BSON_SUBTYPE_UUID, (TEST_BIN(16))->data, 16); + BSON_APPEND_BINARY(marking_bson, "ku", BSON_SUBTYPE_UUID, (TEST_BIN(16))->data, 16); + _make_marking(marking_bson, &marking_buf); + // Use FLE2 as the subtype (default is FLE1) + marking_buf.data[0] = MC_SUBTYPE_FLE2EncryptionPlaceholder; + _parse_ok(&marking_buf, &marking); + + bool result = _mongocrypt_marking_to_ciphertext((void *)&ctx->kb, &marking, out, status); + + _mongocrypt_buffer_cleanup(&marking_buf); + bson_destroy(marking_bson); + _mongocrypt_marking_cleanup(&marking); + mongocrypt_ctx_destroy(ctx); + return result; +} + static void test_mc_marking_to_ciphertext_fle2_text_search(_mongocrypt_tester_t *tester) { if (!_aes_ctr_is_supported_by_os) { TEST_PRINTF("Common Crypto with no CTR support detected. Skipping."); @@ -1382,6 +1467,8 @@ static void test_mc_marking_to_ciphertext_fle2_text_search(_mongocrypt_tester_t _mongocrypt_ciphertext_t ciphertext; _mongocrypt_ciphertext_init(&ciphertext); mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT); + text_search_expected_token_counts counts = {0}; + counts.substrings = calculate_expected_substring_tag_count(6, 1000, 100, 10); get_ciphertext_from_marking_json(tester, crypt, markingJSON, &ciphertext); validate_text_search_ciphertext(tester, @@ -1390,7 +1477,7 @@ static void test_mc_marking_to_ciphertext_fle2_text_search(_mongocrypt_tester_t "foobar", MONGOCRYPT_FLE2_PLACEHOLDER_TYPE_INSERT, 2, - TEXTSEARCH_SPEC_HAS_SUBSTRING); + counts); mongocrypt_destroy(crypt); _mongocrypt_ciphertext_cleanup(&ciphertext); @@ -1413,6 +1500,8 @@ static void test_mc_marking_to_ciphertext_fle2_text_search(_mongocrypt_tester_t _mongocrypt_ciphertext_t ciphertext; _mongocrypt_ciphertext_init(&ciphertext); mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT); + text_search_expected_token_counts counts = {0}; + counts.suffixes = counts.prefixes = calculate_expected_nfix_tag_count(6, 100, 10); get_ciphertext_from_marking_json(tester, crypt, markingJSON, &ciphertext); validate_text_search_ciphertext(tester, @@ -1421,7 +1510,7 @@ static void test_mc_marking_to_ciphertext_fle2_text_search(_mongocrypt_tester_t "foobar", MONGOCRYPT_FLE2_PLACEHOLDER_TYPE_INSERT, 2, - TEXTSEARCH_SPEC_HAS_SUFFIX | TEXTSEARCH_SPEC_HAS_PREFIX); + counts); mongocrypt_destroy(crypt); _mongocrypt_ciphertext_cleanup(&ciphertext); @@ -1443,6 +1532,8 @@ static void test_mc_marking_to_ciphertext_fle2_text_search(_mongocrypt_tester_t _mongocrypt_ciphertext_t ciphertext; _mongocrypt_ciphertext_init(&ciphertext); mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT); + text_search_expected_token_counts counts = {0}; + counts.prefixes = calculate_expected_nfix_tag_count(1, 100, 10); get_ciphertext_from_marking_json(tester, crypt, markingJSON, &ciphertext); validate_text_search_ciphertext(tester, @@ -1451,8 +1542,98 @@ static void test_mc_marking_to_ciphertext_fle2_text_search(_mongocrypt_tester_t "", MONGOCRYPT_FLE2_PLACEHOLDER_TYPE_INSERT, 2, - TEXTSEARCH_SPEC_HAS_PREFIX); + counts); + mongocrypt_destroy(crypt); + _mongocrypt_ciphertext_cleanup(&ciphertext); + } + + // Test string exceeds mlen + { + _mongocrypt_ciphertext_t ciphertext; + _mongocrypt_ciphertext_init(&ciphertext); + mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT); + mongocrypt_status_t *status = mongocrypt_status_new(); + + ASSERT_FAILS_STATUS( + test_text_search_insert_marking_to_ciphertext(tester, crypt, &ciphertext, "foobar", 6, 3, status), + status, + "longer than the maximum length for substring indexing"); + mongocrypt_status_destroy(status); + mongocrypt_destroy(crypt); + _mongocrypt_ciphertext_cleanup(&ciphertext); + } + + // Test string is not valid utf-8 + { + _mongocrypt_ciphertext_t ciphertext; + _mongocrypt_ciphertext_init(&ciphertext); + mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT); + const char *expected_msg = "String passed in was not valid UTF-8"; + mongocrypt_status_t *status = mongocrypt_status_new(); + + // invalid utf-8 byte 0xff + ASSERT_FAILS_STATUS(test_text_search_insert_marking_to_ciphertext(tester, + crypt, + &ciphertext, + "foob\xffr", + 6, + INT32_MAX, + status), + status, + expected_msg); + _mongocrypt_status_reset(status); + // embedded null byte + ASSERT_FAILS_STATUS(test_text_search_insert_marking_to_ciphertext(tester, + crypt, + &ciphertext, + "foob\x00r", + 6, + INT32_MAX, + status), + status, + expected_msg); + _mongocrypt_status_reset(status); + // overlong encoding of 'a' (\x61) + ASSERT_FAILS_STATUS(test_text_search_insert_marking_to_ciphertext(tester, + crypt, + &ciphertext, + "foob\xE0\x81\xA1r", + 8, + INT32_MAX, + status), + status, + expected_msg); + + mongocrypt_status_destroy(status); + mongocrypt_destroy(crypt); + _mongocrypt_ciphertext_cleanup(&ciphertext); + } + + // test string is too large + { + _mongocrypt_ciphertext_t ciphertext; + _mongocrypt_ciphertext_init(&ciphertext); + mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT); + const char *expected_msg = "String passed in was too long"; + mongocrypt_status_t *status = mongocrypt_status_new(); + + int len = (16 * 1024 * 1024) + 2; + char *large_str = bson_malloc(len); + memset(large_str, 'a', len); + large_str[len - 1] = '\0'; + + ASSERT_FAILS_STATUS(test_text_search_insert_marking_to_ciphertext(tester, + crypt, + &ciphertext, + large_str, + len - 1, + INT32_MAX, + status), + status, + expected_msg); + bson_free(large_str); + mongocrypt_status_destroy(status); mongocrypt_destroy(crypt); _mongocrypt_ciphertext_cleanup(&ciphertext); } From 145096972e70b9cafbab7e15e103393da030b9e6 Mon Sep 17 00:00:00 2001 From: Erwin Pe Date: Wed, 29 Jan 2025 17:51:44 +0000 Subject: [PATCH 2/8] assert safe cast of uint32_t length to int --- src/mongocrypt-marking.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/mongocrypt-marking.c b/src/mongocrypt-marking.c index 76b92ea35..f2dc6bf42 100644 --- a/src/mongocrypt-marking.c +++ b/src/mongocrypt-marking.c @@ -1166,9 +1166,10 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, { _mongocrypt_buffer_t asBsonValue; _mongocrypt_buffer_init(&asBsonValue); + BSON_ASSERT(encodeSets->exact.len < INT_MAX); _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, (const char *)encodeSets->exact.data, - encodeSets->exact.len); + (int)encodeSets->exact.len); if (!_fle2_generate_TextExactTokenSet(kb, &tsts->exact, &asBsonValue, @@ -1197,7 +1198,8 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, _mongocrypt_buffer_t asBsonValue; _mongocrypt_buffer_init(&asBsonValue); - _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, bytelen); + BSON_ASSERT(bytelen < INT_MAX); + _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen); if (!_fle2_generate_TextSubstringTokenSet(kb, &tset, @@ -1228,7 +1230,8 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, _mongocrypt_buffer_t asBsonValue; _mongocrypt_buffer_init(&asBsonValue); - _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, bytelen); + BSON_ASSERT(bytelen < INT_MAX); + _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen); if (!_fle2_generate_TextSuffixTokenSet(kb, &tset, @@ -1259,7 +1262,8 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, _mongocrypt_buffer_t asBsonValue; _mongocrypt_buffer_init(&asBsonValue); - _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, bytelen); + BSON_ASSERT(bytelen < INT_MAX); + _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen); if (!_fle2_generate_TextPrefixTokenSet(kb, &tset, From 22a1e32015fbf45c807d325eaf6f586f17e3c5e5 Mon Sep 17 00:00:00 2001 From: Erwin Pe Date: Wed, 29 Jan 2025 18:22:58 +0000 Subject: [PATCH 3/8] Fix memleak --- test/test-mongocrypt-buffer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test-mongocrypt-buffer.c b/test/test-mongocrypt-buffer.c index c931eb1ce..676709f27 100644 --- a/test/test-mongocrypt-buffer.c +++ b/test/test-mongocrypt-buffer.c @@ -252,6 +252,7 @@ static void _test_mongocrypt_buffer_copy_from_string_as_bson_value(_mongocrypt_t strlen(data) + 1, buf.data + expectedLenBuf.len, buf.len - expectedLenBuf.len); + _mongocrypt_buffer_cleanup(&buf); _mongocrypt_buffer_cleanup(&expectedLenBuf); } From 867d146512f811163bd93755a8a7154ec8c704e2 Mon Sep 17 00:00:00 2001 From: Erwin Pe Date: Wed, 29 Jan 2025 20:19:40 +0000 Subject: [PATCH 4/8] Fix conversion errors --- test/test-mongocrypt-buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test-mongocrypt-buffer.c b/test/test-mongocrypt-buffer.c index 676709f27..1945be85d 100644 --- a/test/test-mongocrypt-buffer.c +++ b/test/test-mongocrypt-buffer.c @@ -239,10 +239,10 @@ static void _test_mongocrypt_buffer_copy_from_string_as_bson_value(_mongocrypt_t const char *data = "foobar"; // expect output to contain 4-byte length + data + null string terminator - uint32_t expectedLen = sizeof(int32_t) + strlen(data) + sizeof(uint8_t); + size_t expectedLen = sizeof(int32_t) + strlen(data) + sizeof(uint8_t); _mongocrypt_buffer_copy_from_hex(&expectedLenBuf, "07000000"); - _mongocrypt_buffer_copy_from_string_as_bson_value(&buf, data, strlen(data)); + _mongocrypt_buffer_copy_from_string_as_bson_value(&buf, data, (int)strlen(data)); ASSERT(buf.len == expectedLen); // check 4-byte length From dacfaa15d5e0513c5daaf8c7abfb20f061c59fef Mon Sep 17 00:00:00 2001 From: Erwin Pe Date: Wed, 29 Jan 2025 22:16:00 +0000 Subject: [PATCH 5/8] Gabe's suggestions --- ...mc-fle2-insert-update-payload-private-v2.h | 3 +- src/mc-fle2-insert-update-payload-v2.c | 8 + src/mongocrypt-buffer.c | 8 +- src/mongocrypt-marking.c | 147 ++++++++++-------- test/test-mongocrypt-marking.c | 6 +- 5 files changed, 101 insertions(+), 71 deletions(-) diff --git a/src/mc-fle2-insert-update-payload-private-v2.h b/src/mc-fle2-insert-update-payload-private-v2.h index 138b10e86..7246ff0f7 100644 --- a/src/mc-fle2-insert-update-payload-private-v2.h +++ b/src/mc-fle2-insert-update-payload-private-v2.h @@ -33,7 +33,8 @@ _mongocrypt_buffer_t encryptedTokens; \ } mc_Text##Type##TokenSet_t; \ void mc_Text##Type##TokenSet_init(mc_Text##Type##TokenSet_t *); \ - void mc_Text##Type##TokenSet_cleanup(mc_Text##Type##TokenSet_t *) + void mc_Text##Type##TokenSet_cleanup(mc_Text##Type##TokenSet_t *); \ + void mc_Text##Type##TokenSet_copy(const mc_Text##Type##TokenSet_t *src, mc_Text##Type##TokenSet_t *dest) DEF_TEXT_SEARCH_TOKEN_SET(Exact); DEF_TEXT_SEARCH_TOKEN_SET(Substring); diff --git a/src/mc-fle2-insert-update-payload-v2.c b/src/mc-fle2-insert-update-payload-v2.c index f379f3952..5a161d667 100644 --- a/src/mc-fle2-insert-update-payload-v2.c +++ b/src/mc-fle2-insert-update-payload-v2.c @@ -32,6 +32,14 @@ _mongocrypt_buffer_cleanup(&ts->escDerivedToken); \ _mongocrypt_buffer_cleanup(&ts->serverDerivedFromDataToken); \ _mongocrypt_buffer_cleanup(&ts->encryptedTokens); \ + } \ + void mc_Text##Type##TokenSet_copy(const mc_Text##Type##TokenSet_t *src, mc_Text##Type##TokenSet_t *dst) { \ + BSON_ASSERT_PARAM(src); \ + BSON_ASSERT_PARAM(dst); \ + _mongocrypt_buffer_copy_to(&src->edcDerivedToken, &dst->edcDerivedToken); \ + _mongocrypt_buffer_copy_to(&src->escDerivedToken, &dst->escDerivedToken); \ + _mongocrypt_buffer_copy_to(&src->serverDerivedFromDataToken, &dst->serverDerivedFromDataToken); \ + _mongocrypt_buffer_copy_to(&src->encryptedTokens, &dst->encryptedTokens); \ } DEF_TEXT_SEARCH_TOKEN_SET_INIT_CLEANUP(Exact) diff --git a/src/mongocrypt-buffer.c b/src/mongocrypt-buffer.c index 846962252..257c6a351 100644 --- a/src/mongocrypt-buffer.c +++ b/src/mongocrypt-buffer.c @@ -349,23 +349,23 @@ static void _mongocrypt_buffer_copy_as_bson_value(_mongocrypt_buffer_t *plaintex bson_destroy(&wrapper); } -bool append_iter(bson_t *bson, const void *iter, int len) { +static bool _append_iter(bson_t *bson, const void *iter, int len) { return bson_append_iter(bson, "", 0, (const bson_iter_t *)iter); } -bool append_utf8(bson_t *bson, const void *str, int len) { +static bool _append_utf8(bson_t *bson, const void *str, int len) { return bson_append_utf8(bson, "", 0, (const char *)str, len); } void _mongocrypt_buffer_copy_from_string_as_bson_value(_mongocrypt_buffer_t *plaintext, const char *str, int len) { BSON_ASSERT_PARAM(str); BSON_ASSERT(len >= 0); - _mongocrypt_buffer_copy_as_bson_value(plaintext, append_utf8, str, len); + _mongocrypt_buffer_copy_as_bson_value(plaintext, _append_utf8, str, len); } void _mongocrypt_buffer_from_iter(_mongocrypt_buffer_t *plaintext, bson_iter_t *iter) { BSON_ASSERT_PARAM(iter); - _mongocrypt_buffer_copy_as_bson_value(plaintext, append_iter, iter, 0); + _mongocrypt_buffer_copy_as_bson_value(plaintext, _append_iter, iter, 0); } bool _mongocrypt_buffer_from_uuid_iter(_mongocrypt_buffer_t *buf, bson_iter_t *iter) { diff --git a/src/mongocrypt-marking.c b/src/mongocrypt-marking.c index f2dc6bf42..d53cdbb56 100644 --- a/src/mongocrypt-marking.c +++ b/src/mongocrypt-marking.c @@ -1192,29 +1192,36 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, mc_substring_set_iter_init(&set_itr, encodeSets->substring_set); while (mc_substring_set_iter_next(&set_itr, &substring, &bytelen, &appendCount)) { - for (; appendCount > 0; appendCount--) { - mc_TextSubstringTokenSet_t tset = {{0}}; - mc_TextSubstringTokenSet_init(&tset); - - _mongocrypt_buffer_t asBsonValue; - _mongocrypt_buffer_init(&asBsonValue); - BSON_ASSERT(bytelen < INT_MAX); - _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen); - - if (!_fle2_generate_TextSubstringTokenSet(kb, - &tset, - &asBsonValue, - contentionFactor, - common.collectionsLevel1Token, - common.serverTokenDerivationLevel1Token, - status)) { - _mongocrypt_buffer_cleanup(&asBsonValue); - mc_TextSubstringTokenSet_cleanup(&tset); - goto fail; - } - _mc_array_append_val(&tsts->substringArray, tset); // moves ownership of tset + BSON_ASSERT(appendCount > 0); + BSON_ASSERT(bytelen < INT_MAX); + + mc_TextSubstringTokenSet_t tset = {{0}}; + mc_TextSubstringTokenSet_init(&tset); + + _mongocrypt_buffer_t asBsonValue; + _mongocrypt_buffer_init(&asBsonValue); + _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen); + + if (!_fle2_generate_TextSubstringTokenSet(kb, + &tset, + &asBsonValue, + contentionFactor, + common.collectionsLevel1Token, + common.serverTokenDerivationLevel1Token, + status)) { _mongocrypt_buffer_cleanup(&asBsonValue); + mc_TextSubstringTokenSet_cleanup(&tset); + goto fail; + } + _mongocrypt_buffer_cleanup(&asBsonValue); + + for (; appendCount > 1; appendCount--) { + mc_TextSubstringTokenSet_t tset_copy; + mc_TextSubstringTokenSet_init(&tset_copy); + mc_TextSubstringTokenSet_copy(&tset, &tset_copy); + _mc_array_append_val(&tsts->substringArray, tset_copy); // array now owns tset_copy } + _mc_array_append_val(&tsts->substringArray, tset); // array now owns tset } } @@ -1224,29 +1231,36 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, mc_affix_set_iter_init(&set_itr, encodeSets->suffix_set); while (mc_affix_set_iter_next(&set_itr, &substring, &bytelen, &appendCount)) { - for (; appendCount > 0; appendCount--) { - mc_TextSuffixTokenSet_t tset = {{0}}; - mc_TextSuffixTokenSet_init(&tset); - - _mongocrypt_buffer_t asBsonValue; - _mongocrypt_buffer_init(&asBsonValue); - BSON_ASSERT(bytelen < INT_MAX); - _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen); - - if (!_fle2_generate_TextSuffixTokenSet(kb, - &tset, - &asBsonValue, - contentionFactor, - common.collectionsLevel1Token, - common.serverTokenDerivationLevel1Token, - status)) { - _mongocrypt_buffer_cleanup(&asBsonValue); - mc_TextSuffixTokenSet_cleanup(&tset); - goto fail; - } - _mc_array_append_val(&tsts->suffixArray, tset); // moves ownership of tset + BSON_ASSERT(appendCount > 0); + BSON_ASSERT(bytelen < INT_MAX); + + mc_TextSuffixTokenSet_t tset = {{0}}; + mc_TextSuffixTokenSet_init(&tset); + + _mongocrypt_buffer_t asBsonValue; + _mongocrypt_buffer_init(&asBsonValue); + _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen); + + if (!_fle2_generate_TextSuffixTokenSet(kb, + &tset, + &asBsonValue, + contentionFactor, + common.collectionsLevel1Token, + common.serverTokenDerivationLevel1Token, + status)) { _mongocrypt_buffer_cleanup(&asBsonValue); + mc_TextSuffixTokenSet_cleanup(&tset); + goto fail; } + _mongocrypt_buffer_cleanup(&asBsonValue); + + for (; appendCount > 1; appendCount--) { + mc_TextSuffixTokenSet_t tset_copy; + mc_TextSuffixTokenSet_init(&tset_copy); + mc_TextSuffixTokenSet_copy(&tset, &tset_copy); + _mc_array_append_val(&tsts->suffixArray, tset_copy); // array now owns tset_copy + } + _mc_array_append_val(&tsts->suffixArray, tset); // array now owns tset } } @@ -1256,29 +1270,36 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, mc_affix_set_iter_init(&set_itr, encodeSets->prefix_set); while (mc_affix_set_iter_next(&set_itr, &substring, &bytelen, &appendCount)) { - for (; appendCount > 0; appendCount--) { - mc_TextPrefixTokenSet_t tset = {{0}}; - mc_TextPrefixTokenSet_init(&tset); - - _mongocrypt_buffer_t asBsonValue; - _mongocrypt_buffer_init(&asBsonValue); - BSON_ASSERT(bytelen < INT_MAX); - _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen); - - if (!_fle2_generate_TextPrefixTokenSet(kb, - &tset, - &asBsonValue, - contentionFactor, - common.collectionsLevel1Token, - common.serverTokenDerivationLevel1Token, - status)) { - _mongocrypt_buffer_cleanup(&asBsonValue); - mc_TextPrefixTokenSet_cleanup(&tset); - goto fail; - } - _mc_array_append_val(&tsts->prefixArray, tset); // moves ownership of tset + BSON_ASSERT(appendCount > 0); + BSON_ASSERT(bytelen < INT_MAX); + + mc_TextPrefixTokenSet_t tset = {{0}}; + mc_TextPrefixTokenSet_init(&tset); + + _mongocrypt_buffer_t asBsonValue; + _mongocrypt_buffer_init(&asBsonValue); + _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen); + + if (!_fle2_generate_TextPrefixTokenSet(kb, + &tset, + &asBsonValue, + contentionFactor, + common.collectionsLevel1Token, + common.serverTokenDerivationLevel1Token, + status)) { _mongocrypt_buffer_cleanup(&asBsonValue); + mc_TextPrefixTokenSet_cleanup(&tset); + goto fail; + } + _mongocrypt_buffer_cleanup(&asBsonValue); + + for (; appendCount > 1; appendCount--) { + mc_TextPrefixTokenSet_t tset_copy; + mc_TextPrefixTokenSet_init(&tset_copy); + mc_TextPrefixTokenSet_copy(&tset, &tset_copy); + _mc_array_append_val(&tsts->prefixArray, tset_copy); // array now owns tset_copy } + _mc_array_append_val(&tsts->prefixArray, tset); // moves ownership of tset } } payload->textSearchTokenSets.set = true; diff --git a/test/test-mongocrypt-marking.c b/test/test-mongocrypt-marking.c index 19e305fed..644926c03 100644 --- a/test/test-mongocrypt-marking.c +++ b/test/test-mongocrypt-marking.c @@ -1345,15 +1345,15 @@ static void validate_text_search_ciphertext(_mongocrypt_tester_t *tester, size_t tscount = 0; ASSERT(bson_iter_init_find(&b_iter, &b_bson, "s")); tscount = validate_text_search_token_set_array_common(&b_iter, crypt); - ASSERT(expected_tag_counts.substrings == tscount); + ASSERT_CMPSIZE_T(expected_tag_counts.substrings, ==, tscount); ASSERT(bson_iter_init_find(&b_iter, &b_bson, "u")); tscount = validate_text_search_token_set_array_common(&b_iter, crypt); - ASSERT(expected_tag_counts.suffixes == tscount); + ASSERT_CMPSIZE_T(expected_tag_counts.suffixes, ==, tscount); ASSERT(bson_iter_init_find(&b_iter, &b_bson, "p")); tscount = validate_text_search_token_set_array_common(&b_iter, crypt); - ASSERT(expected_tag_counts.prefixes == tscount); + ASSERT_CMPSIZE_T(expected_tag_counts.prefixes, ==, tscount); } mc_ServerDataEncryptionLevel1Token_destroy(sdel1Token); From 348ab30e1d3c5834145f5cc18aa0a4d28c2dc3a1 Mon Sep 17 00:00:00 2001 From: Erwin Pe Date: Wed, 29 Jan 2025 22:28:27 +0000 Subject: [PATCH 6/8] Gabe's feedback 2; Fix empty sets bug and add test --- ...mc-fle2-insert-update-payload-private-v2.h | 2 +- src/mc-fle2-insert-update-payload-v2.c | 10 +-- src/mongocrypt-marking.c | 58 +++++++------ test/test-mongocrypt-marking.c | 81 ++++++++++++++----- 4 files changed, 92 insertions(+), 59 deletions(-) diff --git a/src/mc-fle2-insert-update-payload-private-v2.h b/src/mc-fle2-insert-update-payload-private-v2.h index 7246ff0f7..fe1c7476a 100644 --- a/src/mc-fle2-insert-update-payload-private-v2.h +++ b/src/mc-fle2-insert-update-payload-private-v2.h @@ -34,7 +34,7 @@ } mc_Text##Type##TokenSet_t; \ void mc_Text##Type##TokenSet_init(mc_Text##Type##TokenSet_t *); \ void mc_Text##Type##TokenSet_cleanup(mc_Text##Type##TokenSet_t *); \ - void mc_Text##Type##TokenSet_copy(const mc_Text##Type##TokenSet_t *src, mc_Text##Type##TokenSet_t *dest) + void mc_Text##Type##TokenSet_shallow_copy(const mc_Text##Type##TokenSet_t *src, mc_Text##Type##TokenSet_t *dest) DEF_TEXT_SEARCH_TOKEN_SET(Exact); DEF_TEXT_SEARCH_TOKEN_SET(Substring); diff --git a/src/mc-fle2-insert-update-payload-v2.c b/src/mc-fle2-insert-update-payload-v2.c index 5a161d667..9f181673b 100644 --- a/src/mc-fle2-insert-update-payload-v2.c +++ b/src/mc-fle2-insert-update-payload-v2.c @@ -33,13 +33,13 @@ _mongocrypt_buffer_cleanup(&ts->serverDerivedFromDataToken); \ _mongocrypt_buffer_cleanup(&ts->encryptedTokens); \ } \ - void mc_Text##Type##TokenSet_copy(const mc_Text##Type##TokenSet_t *src, mc_Text##Type##TokenSet_t *dst) { \ + void mc_Text##Type##TokenSet_shallow_copy(const mc_Text##Type##TokenSet_t *src, mc_Text##Type##TokenSet_t *dst) { \ BSON_ASSERT_PARAM(src); \ BSON_ASSERT_PARAM(dst); \ - _mongocrypt_buffer_copy_to(&src->edcDerivedToken, &dst->edcDerivedToken); \ - _mongocrypt_buffer_copy_to(&src->escDerivedToken, &dst->escDerivedToken); \ - _mongocrypt_buffer_copy_to(&src->serverDerivedFromDataToken, &dst->serverDerivedFromDataToken); \ - _mongocrypt_buffer_copy_to(&src->encryptedTokens, &dst->encryptedTokens); \ + _mongocrypt_buffer_set_to(&src->edcDerivedToken, &dst->edcDerivedToken); \ + _mongocrypt_buffer_set_to(&src->escDerivedToken, &dst->escDerivedToken); \ + _mongocrypt_buffer_set_to(&src->serverDerivedFromDataToken, &dst->serverDerivedFromDataToken); \ + _mongocrypt_buffer_set_to(&src->encryptedTokens, &dst->encryptedTokens); \ } DEF_TEXT_SEARCH_TOKEN_SET_INIT_CLEANUP(Exact) diff --git a/src/mongocrypt-marking.c b/src/mongocrypt-marking.c index d53cdbb56..41a65c76a 100644 --- a/src/mongocrypt-marking.c +++ b/src/mongocrypt-marking.c @@ -1187,7 +1187,7 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, uint32_t appendCount; // Generate array of substring token sets - if (spec->substr.set) { + if (encodeSets->substring_set) { mc_substring_set_iter_t set_itr; mc_substring_set_iter_init(&set_itr, encodeSets->substring_set); @@ -1195,20 +1195,19 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, BSON_ASSERT(appendCount > 0); BSON_ASSERT(bytelen < INT_MAX); - mc_TextSubstringTokenSet_t tset = {{0}}; - mc_TextSubstringTokenSet_init(&tset); + mc_TextSubstringTokenSet_t tset = {0}; _mongocrypt_buffer_t asBsonValue; _mongocrypt_buffer_init(&asBsonValue); _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen); if (!_fle2_generate_TextSubstringTokenSet(kb, - &tset, - &asBsonValue, - contentionFactor, - common.collectionsLevel1Token, - common.serverTokenDerivationLevel1Token, - status)) { + &tset, + &asBsonValue, + contentionFactor, + common.collectionsLevel1Token, + common.serverTokenDerivationLevel1Token, + status)) { _mongocrypt_buffer_cleanup(&asBsonValue); mc_TextSubstringTokenSet_cleanup(&tset); goto fail; @@ -1217,8 +1216,7 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, for (; appendCount > 1; appendCount--) { mc_TextSubstringTokenSet_t tset_copy; - mc_TextSubstringTokenSet_init(&tset_copy); - mc_TextSubstringTokenSet_copy(&tset, &tset_copy); + mc_TextSubstringTokenSet_shallow_copy(&tset, &tset_copy); _mc_array_append_val(&tsts->substringArray, tset_copy); // array now owns tset_copy } _mc_array_append_val(&tsts->substringArray, tset); // array now owns tset @@ -1226,7 +1224,7 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, } // Generate array of suffix token sets - if (spec->suffix.set) { + if (encodeSets->suffix_set) { mc_affix_set_iter_t set_itr; mc_affix_set_iter_init(&set_itr, encodeSets->suffix_set); @@ -1234,7 +1232,7 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, BSON_ASSERT(appendCount > 0); BSON_ASSERT(bytelen < INT_MAX); - mc_TextSuffixTokenSet_t tset = {{0}}; + mc_TextSuffixTokenSet_t tset = {0}; mc_TextSuffixTokenSet_init(&tset); _mongocrypt_buffer_t asBsonValue; @@ -1242,12 +1240,12 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen); if (!_fle2_generate_TextSuffixTokenSet(kb, - &tset, - &asBsonValue, - contentionFactor, - common.collectionsLevel1Token, - common.serverTokenDerivationLevel1Token, - status)) { + &tset, + &asBsonValue, + contentionFactor, + common.collectionsLevel1Token, + common.serverTokenDerivationLevel1Token, + status)) { _mongocrypt_buffer_cleanup(&asBsonValue); mc_TextSuffixTokenSet_cleanup(&tset); goto fail; @@ -1256,8 +1254,7 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, for (; appendCount > 1; appendCount--) { mc_TextSuffixTokenSet_t tset_copy; - mc_TextSuffixTokenSet_init(&tset_copy); - mc_TextSuffixTokenSet_copy(&tset, &tset_copy); + mc_TextSuffixTokenSet_shallow_copy(&tset, &tset_copy); _mc_array_append_val(&tsts->suffixArray, tset_copy); // array now owns tset_copy } _mc_array_append_val(&tsts->suffixArray, tset); // array now owns tset @@ -1265,7 +1262,7 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, } // Generate array of prefix token sets - if (spec->prefix.set) { + if (encodeSets->prefix_set) { mc_affix_set_iter_t set_itr; mc_affix_set_iter_init(&set_itr, encodeSets->prefix_set); @@ -1273,7 +1270,7 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, BSON_ASSERT(appendCount > 0); BSON_ASSERT(bytelen < INT_MAX); - mc_TextPrefixTokenSet_t tset = {{0}}; + mc_TextPrefixTokenSet_t tset = {0}; mc_TextPrefixTokenSet_init(&tset); _mongocrypt_buffer_t asBsonValue; @@ -1281,12 +1278,12 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, _mongocrypt_buffer_copy_from_string_as_bson_value(&asBsonValue, substring, (int)bytelen); if (!_fle2_generate_TextPrefixTokenSet(kb, - &tset, - &asBsonValue, - contentionFactor, - common.collectionsLevel1Token, - common.serverTokenDerivationLevel1Token, - status)) { + &tset, + &asBsonValue, + contentionFactor, + common.collectionsLevel1Token, + common.serverTokenDerivationLevel1Token, + status)) { _mongocrypt_buffer_cleanup(&asBsonValue); mc_TextPrefixTokenSet_cleanup(&tset); goto fail; @@ -1295,8 +1292,7 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, for (; appendCount > 1; appendCount--) { mc_TextPrefixTokenSet_t tset_copy; - mc_TextPrefixTokenSet_init(&tset_copy); - mc_TextPrefixTokenSet_copy(&tset, &tset_copy); + mc_TextPrefixTokenSet_shallow_copy(&tset, &tset_copy); _mc_array_append_val(&tsts->prefixArray, tset_copy); // array now owns tset_copy } _mc_array_append_val(&tsts->prefixArray, tset); // moves ownership of tset diff --git a/test/test-mongocrypt-marking.c b/test/test-mongocrypt-marking.c index 644926c03..eb23f0b56 100644 --- a/test/test-mongocrypt-marking.c +++ b/test/test-mongocrypt-marking.c @@ -1216,16 +1216,16 @@ static void validate_text_search_token_set_common(bson_iter_t *iter_at_token_set mongocrypt_binary_t encrypted_token_bin; validate_and_get_bindata(&ts_bson, "d", BSON_SUBTYPE_BINARY, &token_bin); - ASSERT(token_bin.len == MONGOCRYPT_HMAC_SHA256_LEN); + ASSERT_CMPUINT32(token_bin.len, ==, MONGOCRYPT_HMAC_SHA256_LEN); validate_and_get_bindata(&ts_bson, "l", BSON_SUBTYPE_BINARY, &token_bin); - ASSERT(token_bin.len == MONGOCRYPT_HMAC_SHA256_LEN); + ASSERT_CMPUINT32(token_bin.len, ==, MONGOCRYPT_HMAC_SHA256_LEN); validate_and_get_bindata(&ts_bson, "s", BSON_SUBTYPE_BINARY, &esc_token_bin); - ASSERT(esc_token_bin.len == MONGOCRYPT_HMAC_SHA256_LEN); + ASSERT_CMPUINT32(esc_token_bin.len, ==, MONGOCRYPT_HMAC_SHA256_LEN); validate_and_get_bindata(&ts_bson, "p", BSON_SUBTYPE_BINARY, &encrypted_token_bin); - ASSERT(encrypted_token_bin.len == (16 + MONGOCRYPT_HMAC_SHA256_LEN)); + ASSERT_CMPUINT32(encrypted_token_bin.len, ==, (16 + MONGOCRYPT_HMAC_SHA256_LEN)); // validate crypto of p validate_encrypted_token(crypt, &encrypted_token_bin, &esc_token_bin, false, NULL); @@ -1273,21 +1273,22 @@ static void validate_text_search_ciphertext(_mongocrypt_tester_t *tester, ASSERT(_mongocrypt_buffer_to_bson(&ciphertext->data, &iup_bson)); mc_ServerDataEncryptionLevel1Token_t *sdel1Token = getSDEL1Token(crypt); + const mongocrypt_binary_t *keyId = TEST_BIN(16); // don't free! if (type == MONGOCRYPT_FLE2_PLACEHOLDER_TYPE_INSERT) { - ASSERT(ciphertext->blob_subtype == MC_SUBTYPE_FLE2InsertUpdatePayloadV2); - ASSERT(ciphertext->original_bson_type == 0); // unset - ASSERT(ciphertext->key_id.len == 0); // unset + ASSERT_CMPUINT8(ciphertext->blob_subtype, ==, MC_SUBTYPE_FLE2InsertUpdatePayloadV2); + ASSERT_CMPUINT8(ciphertext->original_bson_type, ==, 0); // unset + ASSERT_CMPUINT32(ciphertext->key_id.len, ==, 0); // unset iupv2_fields_common res = validate_iupv2_common(&iup_bson); // validate u, t, k have correct values - ASSERT(memcmp(res.u.data, (TEST_BIN(16))->data, 16) == 0); - ASSERT(res.t == BSON_TYPE_UTF8); - ASSERT(res.k <= contention_max); + ASSERT_CMPBYTES(keyId->data, keyId->len, res.u.data, res.u.len); + ASSERT_CMPUINT32(res.t, ==, BSON_TYPE_UTF8); + ASSERT_CMPUINT64(res.k, <=, contention_max); // validate e is ServerDataEncryptionLevel1Token = HMAC(RootKey, 3) - ASSERT(res.e.len == mc_ServerDataEncryptionLevel1Token_get(sdel1Token)->len); + ASSERT_CMPUINT32(res.e.len, ==, mc_ServerDataEncryptionLevel1Token_get(sdel1Token)->len); ASSERT(memcmp(res.e.data, mc_ServerDataEncryptionLevel1Token_get(sdel1Token)->data, res.e.len) == 0); // validate crypto of p @@ -1300,8 +1301,8 @@ static void validate_text_search_ciphertext(_mongocrypt_tester_t *tester, const _mongocrypt_value_encryption_algorithm_t *fle2alg = _mcFLE2v2AEADAlgorithm(); // assert first 16 bytes == userKeyId == indexKeyId - ASSERT(res.v.len > 16); - ASSERT(memcmp(res.v.data, res.u.data, res.u.len) == 0); + ASSERT_CMPUINT32(res.v.len, >, 16); + ASSERT_CMPBYTES(keyId->data, keyId->len, res.v.data, 16); _mongocrypt_buffer_t key, aad, ctext, ptext; _mongocrypt_buffer_init_size(&key, MONGOCRYPT_KEY_LEN); @@ -1315,9 +1316,9 @@ static void validate_text_search_ciphertext(_mongocrypt_tester_t *tester, ASSERT_OK_STATUS(fle2alg->do_decrypt(crypt->crypto, &aad, &key, &ctext, &ptext, &pbytes, status), status); // BSON strings have 5 (4 for size + 1 null terminator) bytes of overhead - ASSERT(pbytes >= 5); - ASSERT(strlen(text_value) == pbytes - 5); - ASSERT(0 == strcmp(text_value, (char *)(ptext.data + 4))); + ASSERT_CMPUINT32(pbytes, >=, 5); + ASSERT_CMPSIZE_T(strlen(text_value), ==, (pbytes - 5)); + ASSERT_STREQUAL(text_value, ((char *)(ptext.data + 4))); _mongocrypt_buffer_cleanup(&ptext); _mongocrypt_buffer_cleanup(&ctext); @@ -1361,9 +1362,10 @@ static void validate_text_search_ciphertext(_mongocrypt_tester_t *tester, } static size_t calculate_expected_substring_tag_count(size_t beta, size_t mlen, size_t ub, size_t lb) { - ASSERT(beta <= SIZE_MAX - 15) - ASSERT(lb <= ub); - ASSERT(mlen >= ub); + ASSERT_CMPSIZE_T(beta, <=, (SIZE_MAX - 15)); + ASSERT_CMPSIZE_T(lb, <=, ub); + ASSERT_CMPSIZE_T(mlen, >=, ub); + size_t cbclen = 16 * ((beta + 15) / 16); if (beta > mlen || lb > cbclen) { return 0; @@ -1380,8 +1382,8 @@ static size_t calculate_expected_substring_tag_count(size_t beta, size_t mlen, s } static size_t calculate_expected_nfix_tag_count(size_t beta, size_t ub, size_t lb) { - ASSERT(beta <= SIZE_MAX - 15) - ASSERT(lb <= ub); + ASSERT_CMPSIZE_T(beta, <=, (SIZE_MAX - 15)); + ASSERT_CMPSIZE_T(lb, <=, ub); size_t cbclen = 16 * ((beta + 15) / 16); if (lb > cbclen) { return 0; @@ -1397,7 +1399,8 @@ static bool test_text_search_insert_marking_to_ciphertext(_mongocrypt_tester_t * int test_string_len, int mlen, mongocrypt_status_t *status) { - ASSERT(mlen > 0); + ASSERT_CMPINT(mlen, >, 0); + mongocrypt_ctx_t *ctx = mongocrypt_ctx_new(crypt); // Set up encryption environment ASSERT_OK(mongocrypt_ctx_encrypt_init(ctx, "test", -1, TEST_FILE("./test/example/cmd.json")), ctx); @@ -1547,6 +1550,40 @@ static void test_mc_marking_to_ciphertext_fle2_text_search(_mongocrypt_tester_t _mongocrypt_ciphertext_cleanup(&ciphertext); } + // Test string cbc-padded length is less than lb (ie. substring/suffix/prefix tag sets will be + // empty) + { + const char *markingJSON = RAW_STRING({ + 't' : 1, + 'a' : 4, + 'v' : { + 'v' : "foobar", + 'casef' : false, + 'diacf' : false, + 'substr' : + {'mlen' : {'$numberInt' : '1000'}, 'ub' : {'$numberInt' : '100'}, 'lb' : {'$numberInt' : '20'}}, + 'prefix' : {'ub' : {'$numberInt' : '100'}, 'lb' : {'$numberInt' : '20'}}, + 'suffix' : {'ub' : {'$numberInt' : '100'}, 'lb' : {'$numberInt' : '20'}} + }, + 'cm' : {'$numberLong' : '2'} + }); + _mongocrypt_ciphertext_t ciphertext; + _mongocrypt_ciphertext_init(&ciphertext); + mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT); + text_search_expected_token_counts counts = {0}; + + get_ciphertext_from_marking_json(tester, crypt, markingJSON, &ciphertext); + validate_text_search_ciphertext(tester, + &ciphertext, + crypt, + "foobar", + MONGOCRYPT_FLE2_PLACEHOLDER_TYPE_INSERT, + 2, + counts); + mongocrypt_destroy(crypt); + _mongocrypt_ciphertext_cleanup(&ciphertext); + } + // Test string exceeds mlen { _mongocrypt_ciphertext_t ciphertext; From 3f6df0d68fb2996fb219e8de33508859d92f1ceb Mon Sep 17 00:00:00 2001 From: Erwin Pe Date: Fri, 31 Jan 2025 16:15:10 +0000 Subject: [PATCH 7/8] revert back to double braces --- src/mongocrypt-marking.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mongocrypt-marking.c b/src/mongocrypt-marking.c index 41a65c76a..f5e7ed555 100644 --- a/src/mongocrypt-marking.c +++ b/src/mongocrypt-marking.c @@ -1195,7 +1195,7 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, BSON_ASSERT(appendCount > 0); BSON_ASSERT(bytelen < INT_MAX); - mc_TextSubstringTokenSet_t tset = {0}; + mc_TextSubstringTokenSet_t tset = {{0}}; _mongocrypt_buffer_t asBsonValue; _mongocrypt_buffer_init(&asBsonValue); @@ -1232,7 +1232,7 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, BSON_ASSERT(appendCount > 0); BSON_ASSERT(bytelen < INT_MAX); - mc_TextSuffixTokenSet_t tset = {0}; + mc_TextSuffixTokenSet_t tset = {{0}}; mc_TextSuffixTokenSet_init(&tset); _mongocrypt_buffer_t asBsonValue; @@ -1270,7 +1270,7 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, BSON_ASSERT(appendCount > 0); BSON_ASSERT(bytelen < INT_MAX); - mc_TextPrefixTokenSet_t tset = {0}; + mc_TextPrefixTokenSet_t tset = {{0}}; mc_TextPrefixTokenSet_init(&tset); _mongocrypt_buffer_t asBsonValue; From 1d95bf4b08baef13c9b06c019ef26b6f21fbd33d Mon Sep 17 00:00:00 2001 From: Erwin Pe Date: Fri, 31 Jan 2025 20:39:20 +0000 Subject: [PATCH 8/8] fixes --- src/mongocrypt-marking.c | 19 +++++++++++++------ test/test-mongocrypt-marking.c | 2 ++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/mongocrypt-marking.c b/src/mongocrypt-marking.c index f5e7ed555..50025638f 100644 --- a/src/mongocrypt-marking.c +++ b/src/mongocrypt-marking.c @@ -1177,6 +1177,7 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, common.collectionsLevel1Token, common.serverTokenDerivationLevel1Token, status)) { + _mongocrypt_buffer_cleanup(&asBsonValue); goto fail; } _mongocrypt_buffer_cleanup(&asBsonValue); @@ -1214,10 +1215,12 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, } _mongocrypt_buffer_cleanup(&asBsonValue); - for (; appendCount > 1; appendCount--) { + if (appendCount > 1) { mc_TextSubstringTokenSet_t tset_copy; mc_TextSubstringTokenSet_shallow_copy(&tset, &tset_copy); - _mc_array_append_val(&tsts->substringArray, tset_copy); // array now owns tset_copy + for (; appendCount > 1; appendCount--) { + _mc_array_append_val(&tsts->substringArray, tset_copy); + } } _mc_array_append_val(&tsts->substringArray, tset); // array now owns tset } @@ -1252,10 +1255,12 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, } _mongocrypt_buffer_cleanup(&asBsonValue); - for (; appendCount > 1; appendCount--) { + if (appendCount > 1) { mc_TextSuffixTokenSet_t tset_copy; mc_TextSuffixTokenSet_shallow_copy(&tset, &tset_copy); - _mc_array_append_val(&tsts->suffixArray, tset_copy); // array now owns tset_copy + for (; appendCount > 1; appendCount--) { + _mc_array_append_val(&tsts->suffixArray, tset_copy); + } } _mc_array_append_val(&tsts->suffixArray, tset); // array now owns tset } @@ -1290,10 +1295,12 @@ static bool _fle2_generate_TextSearchTokenSets(_mongocrypt_key_broker_t *kb, } _mongocrypt_buffer_cleanup(&asBsonValue); - for (; appendCount > 1; appendCount--) { + if (appendCount > 1) { mc_TextPrefixTokenSet_t tset_copy; mc_TextPrefixTokenSet_shallow_copy(&tset, &tset_copy); - _mc_array_append_val(&tsts->prefixArray, tset_copy); // array now owns tset_copy + for (; appendCount > 1; appendCount--) { + _mc_array_append_val(&tsts->prefixArray, tset_copy); // array now owns tset_copy + } } _mc_array_append_val(&tsts->prefixArray, tset); // moves ownership of tset } diff --git a/test/test-mongocrypt-marking.c b/test/test-mongocrypt-marking.c index eb23f0b56..7d4ab28c6 100644 --- a/test/test-mongocrypt-marking.c +++ b/test/test-mongocrypt-marking.c @@ -1536,6 +1536,8 @@ static void test_mc_marking_to_ciphertext_fle2_text_search(_mongocrypt_tester_t _mongocrypt_ciphertext_init(&ciphertext); mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT); text_search_expected_token_counts counts = {0}; + + // beta is 1 for empty strings counts.prefixes = calculate_expected_nfix_tag_count(1, 100, 10); get_ciphertext_from_marking_json(tester, crypt, markingJSON, &ciphertext);