Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ set (MONGOCRYPT_SOURCES
src/mc-range-encoding.c
src/mc-rangeopts.c
src/mc-reader.c
src/mc-text-search-str-encode.c
src/mc-tokens.c
src/mc-writer.c
src/mongocrypt-binary.c
Expand Down Expand Up @@ -474,6 +475,7 @@ set (TEST_MONGOCRYPT_SOURCES
test/test-mc-range-mincover.c
test/test-mc-rangeopts.c
test/test-mc-reader.c
test/test-mc-text-search-str-encode.c
test/test-mc-tokens.c
test/test-mc-range-encoding.c
test/test-mc-writer.c
Expand Down
52 changes: 52 additions & 0 deletions src/mc-fle2-encryption-placeholder-private.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,58 @@ bool mc_FLE2RangeInsertSpec_parse(mc_FLE2RangeInsertSpec_t *out,
bool use_range_v2,
mongocrypt_status_t *status);

typedef struct {
// mlen is the max string length that can be indexed.
uint32_t mlen;
// lb is the lower bound on the length of substrings to be indexed.
uint32_t lb;
// ub is the upper bound on the length of substrings to be indexed.
uint32_t ub;
} mc_FLE2SubstringInsertSpec_t;

typedef struct {
// lb is the lower bound on the length of suffixes to be indexed.
uint32_t lb;
// ub is the upper bound on the length of suffixes to be indexed.
uint32_t ub;
} mc_FLE2SuffixInsertSpec_t;

typedef struct {
// lb is the lower bound on the length of prefixes to be indexed.
uint32_t lb;
// ub is the upper bound on the length of prefixes to be indexed.
uint32_t ub;
} mc_FLE2PrefixInsertSpec_t;

typedef struct {
// v is the value to encrypt.
const char *v;
uint32_t len;

// substr is the spec for substring indexing.
struct {
mc_FLE2SubstringInsertSpec_t value;
bool set;
} substr;

// suffix is the spec for suffix indexing.
struct {
mc_FLE2SuffixInsertSpec_t value;
bool set;
} suffix;

// prefix is the spec for prefix indexing.
struct {
mc_FLE2PrefixInsertSpec_t value;
bool set;
} prefix;

// casef indicates if case folding is enabled.
bool casef;
// diacf indicates if diacritic folding is enabled.
bool diacf;
} mc_FLE2TextSearchInsertSpec_t;

/** FLE2EncryptionPlaceholder implements Encryption BinData (subtype 6)
* sub-subtype 0, the intent-to-encrypt mapping. Contains a value to encrypt and
* a description of how it should be encrypted.
Expand Down
76 changes: 76 additions & 0 deletions src/mc-text-search-str-encode-private.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Copyright 2024-present MongoDB, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef MONGOCRYPT_TEXT_SEARCH_STR_ENCODE_PRIVATE_H
#define MONGOCRYPT_TEXT_SEARCH_STR_ENCODE_PRIVATE_H

#include "mc-fle2-encryption-placeholder-private.h"
#include "mongocrypt-status-private.h"
#include "mongocrypt.h"

// Represents a valid unicode string with the bad character 0xFF appended to the end. This is our base string which
// we build substring trees on. Stores all the valid code points in the string, plus one code point for 0xFF.
// Exposed for testing.
typedef struct {
char *data;
uint32_t len;
uint32_t *codepoint_offsets;
uint32_t codepoint_len;
} mc_utf8_string_with_bad_char_t;

// Set of substrings of a shared base string.
typedef struct _mc_substring_set_t mc_substring_set_t;

// Iterator on substring_set.
typedef struct {
mc_substring_set_t *set;
uint32_t cur_idx;
} mc_substring_set_iter_t;

// Point the iterator to the first substring of the given set.
void mc_substring_set_iter_init(mc_substring_set_iter_t *it, mc_substring_set_t *set);

// Get the next substring, its length, and its count. Returns false if the set does not have a next element, true
// otherwise.
bool mc_substring_set_iter_next(mc_substring_set_iter_t *it, const char **str, uint32_t *len, uint32_t *count);

// Result of a StrEncode. Contains the computed prefix, suffix, and substring trees, or NULL if empty, as well as the
// exact string.
typedef struct {
// Base string which the substring sets point to.
mc_utf8_string_with_bad_char_t *base_string;
// Set of encoded suffixes.
mc_substring_set_t *suffix_set;
// Set of encoded prefixes.
mc_substring_set_t *prefix_set;
// Set of encoded substrings.
mc_substring_set_t *substring_set;
// Encoded exact string.
char *exact;
size_t exact_len;
} mc_str_encode_sets_t;

// Run StrEncode with the given spec.
mc_str_encode_sets_t *mc_text_search_str_encode(const mc_FLE2TextSearchInsertSpec_t *spec, mongocrypt_status_t *status);

// TODO MONGOCRYPT-759 This helper only exists to test folded_len != unfolded_len; make the test actually use folding
mc_str_encode_sets_t *mc_text_search_str_encode_helper(const mc_FLE2TextSearchInsertSpec_t *spec,
uint32_t unfolded_len,
mongocrypt_status_t *status);

void mc_str_encode_sets_destroy(mc_str_encode_sets_t *sets);

#endif /* MONGOCRYPT_TEXT_SEARCH_STR_ENCODE_PRIVATE_H */
Loading
Loading