diff --git a/.github/workflows/encrypted_settings.py b/.github/workflows/encrypted_settings.py new file mode 100644 index 000000000..0436b04f7 --- /dev/null +++ b/.github/workflows/encrypted_settings.py @@ -0,0 +1,43 @@ +# Settings for django_mongodb_backend/tests when encryption is supported. +import os + +from mongodb_settings import * # noqa: F403 +from pymongo.encryption import AutoEncryptionOpts + +os.environ["LD_LIBRARY_PATH"] = os.environ["GITHUB_WORKSPACE"] + "/lib/" + +DATABASES["encrypted"] = { # noqa: F405 + "ENGINE": "django_mongodb_backend", + "NAME": "djangotests_encrypted", + "OPTIONS": { + "auto_encryption_opts": AutoEncryptionOpts( + key_vault_namespace="djangotests_encrypted.__keyVault", + kms_providers={"local": {"key": os.urandom(96)}}, + crypt_shared_lib_path=os.environ["GITHUB_WORKSPACE"] + "/lib/mongo_crypt_v1.so", + ), + "directConnection": True, + }, + "KMS_CREDENTIALS": {}, +} + + +class EncryptedRouter: + def db_for_read(self, model, **hints): + if model._meta.app_label == "encryption_": + return "encrypted" + return None + + db_for_write = db_for_read + + def allow_migrate(self, db, app_label, model_name=None, **hints): + # The encryption_ app's models are only created in the encrypted + # database. + if app_label == "encryption_": + return db == "encrypted" + # Don't create other app's models in the encrypted database. + if db == "encrypted": + return False + return None + + +DATABASE_ROUTERS.append(EncryptedRouter()) # noqa: F405 diff --git a/.github/workflows/mongodb_settings.py b/.github/workflows/mongodb_settings.py index 4dce3c0d5..619bdcd95 100644 --- a/.github/workflows/mongodb_settings.py +++ b/.github/workflows/mongodb_settings.py @@ -1,4 +1,5 @@ -# Settings for django_mongodb_backend/tests. +# Settings for django_mongodb_backend/tests when encryption isn't supported. from django_settings import * # noqa: F403 +DATABASES["encrypted"] = {} # noqa: F405 DATABASE_ROUTERS = ["django_mongodb_backend.routers.MongoRouter"] diff --git a/.github/workflows/runtests.py b/.github/workflows/runtests.py index cc258f363..3775c422b 100755 --- a/.github/workflows/runtests.py +++ b/.github/workflows/runtests.py @@ -6,151 +6,6 @@ from django.core.exceptions import ImproperlyConfigured test_apps = [ - "admin_changelist", - "admin_checks", - "admin_custom_urls", - "admin_docs", - "admin_filters", - "admin_inlines", - "admin_ordering", - "admin_scripts", - "admin_utils", - "admin_views", - "admin_widgets", - "aggregation", - "aggregation_regress", - "annotations", - "apps", - "async", - "auth_tests", - "backends", - "basic", - "bulk_create", - "cache", - "check_framework", - "constraints", - "contenttypes_tests", - "context_processors", - "custom_columns", - "custom_lookups", - "custom_managers", - "custom_pk", - "datatypes", - "dates", - "datetimes", - "db_functions", - "defer", - "defer_regress", - "delete", - "delete_regress", - "empty", - "empty_models", - "expressions", - "expressions_case", - "field_defaults", - "file_storage", - "file_uploads", - "fixtures", - "fixtures_model_package", - "fixtures_regress", - "flatpages_tests", - "force_insert_update", - "foreign_object", - "forms_tests", - "from_db_value", - "generic_inline_admin", - "generic_relations", - "generic_relations_regress", - "generic_views", - "get_earliest_or_latest", - "get_object_or_404", - "get_or_create", - "i18n", - "indexes", - "inline_formsets", - "introspection", - "invalid_models_tests", - "known_related_objects", - "lookup", - "m2m_and_m2o", - "m2m_intermediary", - "m2m_multiple", - "m2m_recursive", - "m2m_regress", - "m2m_signals", - "m2m_through", - "m2m_through_regress", - "m2o_recursive", - "managers_regress", - "many_to_many", - "many_to_one", - "many_to_one_null", - "max_lengths", - "messages_tests", - "migrate_signals", - "migration_test_data_persistence", - "migrations", - "model_fields", - "model_forms", - "model_formsets", - "model_formsets_regress", - "model_indexes", - "model_inheritance", - "model_inheritance_regress", - "model_options", - "model_package", - "model_regress", - "model_utils", - "modeladmin", - "multiple_database", - "mutually_referential", - "nested_foreign_keys", - "null_fk", - "null_fk_ordering", - "null_queries", - "one_to_one", - "or_lookups", - "order_with_respect_to", - "ordering", - "pagination", - "prefetch_related", - "proxy_model_inheritance", - "proxy_models", - "queries", - "queryset_pickle", - "redirects_tests", - "reserved_names", - "reverse_lookup", - "save_delete_hooks", - "schema", - "select_for_update", - "select_related", - "select_related_onetoone", - "select_related_regress", - "serializers", - "servers", - "sessions_tests", - "shortcuts", - "signals", - "sitemaps_tests", - "sites_framework", - "sites_tests", - "string_lookup", - "swappable_models", - "syndication_tests", - "test_client", - "test_client_regress", - "test_runner", - "test_utils", - "timezones", - "transactions", - "unmanaged_models", - "update", - "update_only_fields", - "user_commands", - "validation", - "view_tests", - "xor_lookups", # Add directories in django_mongodb_backend/tests *sorted( [ diff --git a/.github/workflows/test-python-atlas.yml b/.github/workflows/test-python-atlas.yml index e98d2512d..5f8495130 100644 --- a/.github/workflows/test-python-atlas.yml +++ b/.github/workflows/test-python-atlas.yml @@ -28,7 +28,7 @@ jobs: - name: install django-mongodb-backend run: | pip3 install --upgrade pip - pip3 install -e . + pip3 install -e .[encryption] - name: Checkout Django uses: actions/checkout@v5 with: @@ -51,8 +51,15 @@ jobs: run: cp .github/workflows/runtests.py django_repo/tests/runtests_.py - name: Start local Atlas working-directory: . - run: bash .github/workflows/start_local_atlas.sh mongodb/mongodb-atlas-local:7 + run: bash .github/workflows/start_local_atlas.sh mongodb/mongodb-atlas-local:8.0.15 + - name: Download crypt shared + run: | + wget https://downloads.mongodb.com/linux/mongo_crypt_shared_v1-linux-x86_64-enterprise-ubuntu2404-8.0.15.tgz + tar -xvzf mongo_crypt_shared_v1-linux-x86_64-enterprise-ubuntu2404-8.0.15.tgz lib/mongo_crypt_v1.so + ls -d "$PWD"/lib/mongo_crypt_v1.so - name: Run tests run: python3 django_repo/tests/runtests_.py permissions: contents: read + env: + DJANGO_SETTINGS_MODULE: "encrypted_settings" diff --git a/.github/workflows/test-python-geo.yml b/.github/workflows/test-python-geo.yml deleted file mode 100644 index 309f3506a..000000000 --- a/.github/workflows/test-python-geo.yml +++ /dev/null @@ -1,60 +0,0 @@ -# Identical to test-python.yml except that gdal-bin is also installed. -name: Python Tests with GeoDjango - -on: - pull_request: - paths: - - '**.py' - - '!setup.py' - - '.github/workflows/test-python-geo.yml' - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -defaults: - run: - shell: bash -eux {0} - -jobs: - build: - name: Django Test Suite - runs-on: ubuntu-latest - steps: - - name: Checkout django-mongodb-backend - uses: actions/checkout@v5 - with: - persist-credentials: false - - name: install django-mongodb-backend - run: | - pip3 install --upgrade pip - pip3 install -e . - - name: Checkout Django - uses: actions/checkout@v5 - with: - repository: 'mongodb-forks/django' - ref: 'mongodb-5.2.x' - path: 'django_repo' - persist-credentials: false - - name: Install system packages for Django's Python test dependencies - run: | - sudo apt-get update - sudo apt-get install gdal-bin libmemcached-dev - - name: Install Django and its Python test dependencies - run: | - cd django_repo/tests/ - pip3 install -e .. - pip3 install -r requirements/py3.txt - - name: Copy the test settings files - run: cp .github/workflows/*_settings.py django_repo/tests/ - - name: Copy the test runner file - run: cp .github/workflows/runtests.py django_repo/tests/runtests_.py - - name: Start MongoDB - uses: supercharge/mongodb-github-action@90004df786821b6308fb02299e5835d0dae05d0d # 1.12.0 - with: - mongodb-version: 6.0 - - name: Run tests - run: python3 django_repo/tests/runtests_.py - permissions: - contents: read diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml deleted file mode 100644 index 7f74b3376..000000000 --- a/.github/workflows/test-python.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: Python Tests - -on: - pull_request: - paths: - - '**.py' - - '!setup.py' - - '.github/workflows/test-python.yml' - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -defaults: - run: - shell: bash -eux {0} - -jobs: - build: - name: Django Test Suite - runs-on: ubuntu-latest - steps: - - name: Checkout django-mongodb-backend - uses: actions/checkout@v5 - with: - persist-credentials: false - - name: install django-mongodb-backend - run: | - pip3 install --upgrade pip - pip3 install -e . - - name: Checkout Django - uses: actions/checkout@v5 - with: - repository: 'mongodb-forks/django' - ref: 'mongodb-5.2.x' - path: 'django_repo' - persist-credentials: false - - name: Install system packages for Django's Python test dependencies - run: | - sudo apt-get update - sudo apt-get install libmemcached-dev - - name: Install Django and its Python test dependencies - run: | - cd django_repo/tests/ - pip3 install -e .. - pip3 install -r requirements/py3.txt - - name: Copy the test settings files - run: cp .github/workflows/*_settings.py django_repo/tests/ - - name: Copy the test runner file - run: cp .github/workflows/runtests.py django_repo/tests/runtests_.py - - name: Start MongoDB - uses: supercharge/mongodb-github-action@90004df786821b6308fb02299e5835d0dae05d0d # 1.12.0 - with: - mongodb-version: 6.0 - - name: Run tests - run: python3 django_repo/tests/runtests_.py - permissions: - contents: read diff --git a/django_mongodb_backend/__init__.py b/django_mongodb_backend/__init__.py index 577a4f104..752d72802 100644 --- a/django_mongodb_backend/__init__.py +++ b/django_mongodb_backend/__init__.py @@ -14,6 +14,7 @@ from .indexes import register_indexes # noqa: E402 from .lookups import register_lookups # noqa: E402 from .query import register_nodes # noqa: E402 +from .routers import register_routers # noqa: E402 __all__ = ["parse_uri"] @@ -25,3 +26,4 @@ register_indexes() register_lookups() register_nodes() +register_routers() diff --git a/django_mongodb_backend/base.py b/django_mongodb_backend/base.py index 88c2a1189..b1afc1b03 100644 --- a/django_mongodb_backend/base.py +++ b/django_mongodb_backend/base.py @@ -11,6 +11,7 @@ from django.utils.functional import cached_property from pymongo.collection import Collection from pymongo.driver_info import DriverInfo +from pymongo.encryption import ClientEncryption from pymongo.mongo_client import MongoClient from pymongo.uri_parser import parse_uri @@ -241,6 +242,16 @@ def get_database(self): return OperationDebugWrapper(self) return self.database + @cached_property + def client_encryption(self): + auto_encryption_opts = self.connection._options.auto_encryption_opts + return ClientEncryption( + auto_encryption_opts._kms_providers, + auto_encryption_opts._key_vault_namespace, + self.connection, + self.connection.codec_options, + ) + @cached_property def database(self): """Connect to the database the first time it's accessed.""" diff --git a/django_mongodb_backend/creation.py b/django_mongodb_backend/creation.py index c8002b2c4..a1d45277e 100644 --- a/django_mongodb_backend/creation.py +++ b/django_mongodb_backend/creation.py @@ -1,5 +1,5 @@ from django.conf import settings -from django.db.backends.base.creation import BaseDatabaseCreation +from django.db.backends.base.creation import TEST_DATABASE_PREFIX, BaseDatabaseCreation class DatabaseCreation(BaseDatabaseCreation): @@ -7,6 +7,14 @@ def _execute_create_test_db(self, cursor, parameters, keepdb=False): # Close the connection (which may point to the non-test database) so # that a new connection to the test database can be established later. self.connection.close_pool() + # Use a test _key_vault_namespace. This assumes the key vault database + # is the same as the encrypted database so that _destroy_test_db() can + # reset the collection by dropping it. + opts = self.connection.settings_dict["OPTIONS"].get("auto_encryption_opts") + if opts: + self.connection.settings_dict["OPTIONS"][ + "auto_encryption_opts" + ]._key_vault_namespace = TEST_DATABASE_PREFIX + opts._key_vault_namespace if not keepdb: self._destroy_test_db(parameters["dbname"], verbosity=0) @@ -24,3 +32,9 @@ def destroy_test_db(self, old_database_name=None, verbosity=1, keepdb=False, suf super().destroy_test_db(old_database_name, verbosity, keepdb, suffix) # Close the connection to the test database. self.connection.close_pool() + # Restore the original _key_vault_namespace. + opts = self.connection.settings_dict["OPTIONS"].get("auto_encryption_opts") + if opts: + self.connection.settings_dict["OPTIONS"][ + "auto_encryption_opts" + ]._key_vault_namespace = opts._key_vault_namespace[len(TEST_DATABASE_PREFIX) :] diff --git a/django_mongodb_backend/features.py b/django_mongodb_backend/features.py index 18a048bf6..6f4c1e8f5 100644 --- a/django_mongodb_backend/features.py +++ b/django_mongodb_backend/features.py @@ -588,9 +588,21 @@ def django_test_skips(self): skips.update(self._django_test_skips) return skips + @cached_property + def mongodb_version(self): + return self.connection.get_database_version() # e.g., (6, 3, 0) + @cached_property def is_mongodb_6_3(self): - return self.connection.get_database_version() >= (6, 3) + return self.mongodb_version >= (6, 3) + + @cached_property + def is_mongodb_7_0(self): + return self.mongodb_version >= (7, 0) + + @cached_property + def is_mongodb_8_0(self): + return self.mongodb_version >= (8, 0) @cached_property def supports_atlas_search(self): @@ -620,3 +632,18 @@ def _supports_transactions(self): hello = client.command("hello") # a replica set or a sharded cluster return "setName" in hello or hello.get("msg") == "isdbgrid" + + @cached_property + def supports_queryable_encryption(self): + """ + Queryable Encryption requires a MongoDB 8.0 or later replica set or sharded + cluster, as well as MongoDB Atlas or Enterprise. + """ + self.connection.ensure_connection() + build_info = self.connection.connection.admin.command("buildInfo") + is_enterprise = "enterprise" in build_info.get("modules") + return ( + (is_enterprise or self.supports_atlas_search) + and self._supports_transactions + and self.is_mongodb_8_0 + ) diff --git a/django_mongodb_backend/fields/__init__.py b/django_mongodb_backend/fields/__init__.py index 0c95afd69..6cc4bcc18 100644 --- a/django_mongodb_backend/fields/__init__.py +++ b/django_mongodb_backend/fields/__init__.py @@ -3,6 +3,33 @@ from .duration import register_duration_field from .embedded_model import EmbeddedModelField from .embedded_model_array import EmbeddedModelArrayField +from .encryption import ( + EncryptedArrayField, + EncryptedBigIntegerField, + EncryptedBinaryField, + EncryptedBooleanField, + EncryptedCharField, + EncryptedDateField, + EncryptedDateTimeField, + EncryptedDecimalField, + EncryptedDurationField, + EncryptedEmailField, + EncryptedEmbeddedModelArrayField, + EncryptedEmbeddedModelField, + EncryptedFieldMixin, + EncryptedFloatField, + EncryptedGenericIPAddressField, + EncryptedIntegerField, + EncryptedObjectIdField, + EncryptedPositiveBigIntegerField, + EncryptedPositiveIntegerField, + EncryptedPositiveSmallIntegerField, + EncryptedSmallIntegerField, + EncryptedTextField, + EncryptedTimeField, + EncryptedURLField, + EncryptedUUIDField, +) from .json import register_json_field from .objectid import ObjectIdField from .polymorphic_embedded_model import PolymorphicEmbeddedModelField @@ -12,6 +39,31 @@ "ArrayField", "EmbeddedModelArrayField", "EmbeddedModelField", + "EncryptedArrayField", + "EncryptedBigIntegerField", + "EncryptedBinaryField", + "EncryptedBooleanField", + "EncryptedCharField", + "EncryptedDateField", + "EncryptedDateTimeField", + "EncryptedDecimalField", + "EncryptedDurationField", + "EncryptedEmailField", + "EncryptedEmbeddedModelArrayField", + "EncryptedEmbeddedModelField", + "EncryptedFieldMixin", + "EncryptedFloatField", + "EncryptedGenericIPAddressField", + "EncryptedIntegerField", + "EncryptedObjectIdField", + "EncryptedPositiveBigIntegerField", + "EncryptedPositiveIntegerField", + "EncryptedPositiveSmallIntegerField", + "EncryptedSmallIntegerField", + "EncryptedTextField", + "EncryptedTimeField", + "EncryptedURLField", + "EncryptedUUIDField", "ObjectIdAutoField", "ObjectIdField", "PolymorphicEmbeddedModelArrayField", diff --git a/django_mongodb_backend/fields/encryption.py b/django_mongodb_backend/fields/encryption.py new file mode 100644 index 000000000..3ced82769 --- /dev/null +++ b/django_mongodb_backend/fields/encryption.py @@ -0,0 +1,139 @@ +from django.db import models + +from django_mongodb_backend.fields import ArrayField, EmbeddedModelArrayField, EmbeddedModelField +from django_mongodb_backend.fields.objectid import ObjectIdField + + +class EncryptedFieldMixin: + encrypted = True + + def __init__(self, *args, queries=None, db_index=False, null=False, unique=False, **kwargs): + if db_index: + raise ValueError("'db_index=True' is not supported on encrypted fields.") + if null: + raise ValueError("'null=True' is not supported on encrypted fields.") + if unique: + raise ValueError("'unique=True' is not supported on encrypted fields.") + self.queries = queries + super().__init__(*args, **kwargs) + + def deconstruct(self): + name, path, args, kwargs = super().deconstruct() + + if self.queries is not None: + kwargs["queries"] = self.queries + + if path.startswith("django_mongodb_backend.fields.encryption"): + path = path.replace( + "django_mongodb_backend.fields.encryption", + "django_mongodb_backend.fields", + ) + + return name, path, args, kwargs + + +class NoQueriesMixin: + def __init__(self, *args, **kwargs): + if "queries" in kwargs: + raise ValueError(f"{self.__class__.__name__} does not support the queries argument.") + super().__init__(*args, **kwargs) + + +# Django fields +class EncryptedBinaryField(EncryptedFieldMixin, models.BinaryField): + pass + + +class EncryptedBigIntegerField(EncryptedFieldMixin, models.BigIntegerField): + pass + + +class EncryptedBooleanField(EncryptedFieldMixin, models.BooleanField): + pass + + +class EncryptedCharField(EncryptedFieldMixin, models.CharField): + pass + + +class EncryptedDateField(EncryptedFieldMixin, models.DateField): + pass + + +class EncryptedDateTimeField(EncryptedFieldMixin, models.DateTimeField): + pass + + +class EncryptedDecimalField(EncryptedFieldMixin, models.DecimalField): + pass + + +class EncryptedDurationField(EncryptedFieldMixin, models.DurationField): + pass + + +class EncryptedEmailField(EncryptedFieldMixin, models.EmailField): + pass + + +class EncryptedFloatField(EncryptedFieldMixin, models.FloatField): + pass + + +class EncryptedGenericIPAddressField(EncryptedFieldMixin, models.GenericIPAddressField): + pass + + +class EncryptedIntegerField(EncryptedFieldMixin, models.IntegerField): + pass + + +class EncryptedPositiveBigIntegerField(EncryptedFieldMixin, models.PositiveBigIntegerField): + pass + + +class EncryptedPositiveIntegerField(EncryptedFieldMixin, models.PositiveIntegerField): + pass + + +class EncryptedPositiveSmallIntegerField(EncryptedFieldMixin, models.PositiveSmallIntegerField): + pass + + +class EncryptedSmallIntegerField(EncryptedFieldMixin, models.SmallIntegerField): + pass + + +class EncryptedTextField(EncryptedFieldMixin, models.TextField): + pass + + +class EncryptedTimeField(EncryptedFieldMixin, models.TimeField): + pass + + +class EncryptedURLField(EncryptedFieldMixin, models.URLField): + pass + + +class EncryptedUUIDField(EncryptedFieldMixin, models.UUIDField): + pass + + +# MongoDB fields +class EncryptedArrayField(NoQueriesMixin, EncryptedFieldMixin, ArrayField): + pass + + +class EncryptedEmbeddedModelArrayField( + NoQueriesMixin, EncryptedFieldMixin, EmbeddedModelArrayField +): + pass + + +class EncryptedEmbeddedModelField(NoQueriesMixin, EncryptedFieldMixin, EmbeddedModelField): + pass + + +class EncryptedObjectIdField(EncryptedFieldMixin, ObjectIdField): + pass diff --git a/django_mongodb_backend/management/commands/showencryptedfieldsmap.py b/django_mongodb_backend/management/commands/showencryptedfieldsmap.py new file mode 100644 index 000000000..017fabde5 --- /dev/null +++ b/django_mongodb_backend/management/commands/showencryptedfieldsmap.py @@ -0,0 +1,35 @@ +from bson import json_util +from django.apps import apps +from django.core.management.base import BaseCommand +from django.db import DEFAULT_DB_ALIAS, connections, router + +from django_mongodb_backend.utils import model_has_encrypted_fields + + +class Command(BaseCommand): + help = """ + Shows the mapping of encrypted fields to field attributes, including data + type, data keys and query types. The output can be used to set + ``encrypted_fields_map`` in ``AutoEncryptionOpts``. + """ + + def add_arguments(self, parser): + parser.add_argument( + "--database", + default=DEFAULT_DB_ALIAS, + help=""" + Specifies the database to use. Defaults to ``default``.""", + ) + + def handle(self, *args, **options): + db = options["database"] + connection = connections[db] + connection.ensure_connection() + encrypted_fields_map = {} + with connection.schema_editor() as editor: + for app_config in apps.get_app_configs(): + for model in router.get_migratable_models(app_config, db): + if model_has_encrypted_fields(model): + fields = editor._get_encrypted_fields(model, create_data_keys=False) + encrypted_fields_map[model._meta.db_table] = fields + self.stdout.write(json_util.dumps(encrypted_fields_map, indent=4)) diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index c743ca8bf..8f295f8e7 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -24,7 +24,7 @@ def wrapper(*args, **kwargs): except DuplicateKeyError as e: raise IntegrityError from e except PyMongoError as e: - raise DatabaseError from e + raise DatabaseError(str(e)) from e return wrapper diff --git a/django_mongodb_backend/routers.py b/django_mongodb_backend/routers.py index 60e54bbd8..b17f4b021 100644 --- a/django_mongodb_backend/routers.py +++ b/django_mongodb_backend/routers.py @@ -1,6 +1,6 @@ from django.apps import apps - -from django_mongodb_backend.models import EmbeddedModel +from django.core.exceptions import ImproperlyConfigured +from django.db.utils import ConnectionRouter class MongoRouter: @@ -9,6 +9,8 @@ def allow_migrate(self, db, app_label, model_name=None, **hints): EmbeddedModels don't have their own collection and must be ignored by dumpdata. """ + from django_mongodb_backend.models import EmbeddedModel # noqa: PLC0415 + if not model_name: return None try: @@ -16,3 +18,22 @@ def allow_migrate(self, db, app_label, model_name=None, **hints): except LookupError: return None return False if issubclass(model, EmbeddedModel) else None + + +# This function is intended to be monkey-patched as a method of ConnectionRouter. +def kms_provider(self, model, *args, **kwargs): + """ + Return the Key Management Service (KMS) provider for a given model. + + Call each router's kms_provider() method (if present), and return the + first non-None result. Raise ImproperlyConfigured if no provider is found. + """ + for router in self.routers: + func = getattr(router, "kms_provider", None) + if func and callable(func) and (result := func(model, *args, **kwargs)): + return result + raise ImproperlyConfigured("No kms_provider found in database routers.") + + +def register_routers(): + ConnectionRouter.kms_provider = kms_provider diff --git a/django_mongodb_backend/schema.py b/django_mongodb_backend/schema.py index 9bcaecc63..76abab836 100644 --- a/django_mongodb_backend/schema.py +++ b/django_mongodb_backend/schema.py @@ -1,5 +1,7 @@ from time import monotonic, sleep +from django.core.exceptions import ImproperlyConfigured +from django.db import router from django.db.backends.base.schema import BaseDatabaseSchemaEditor from django.db.models import Index, UniqueConstraint from pymongo.operations import SearchIndexModel @@ -9,7 +11,7 @@ from .fields import EmbeddedModelField from .gis.schema import GISSchemaEditor from .query import wrap_database_errors -from .utils import OperationCollector +from .utils import OperationCollector, model_has_encrypted_fields def ignore_embedded_models(func): @@ -44,7 +46,7 @@ def get_database(self): @wrap_database_errors @ignore_embedded_models def create_model(self, model): - self.get_database().create_collection(model._meta.db_table) + self._create_collection(model) self._create_model_indexes(model) # Make implicit M2M tables. for field in model._meta.local_many_to_many: @@ -452,6 +454,106 @@ def wait_until_index_dropped(collection, index_name, timeout=60, interval=0.5): sleep(interval) raise TimeoutError(f"Index {index_name} not dropped after {timeout} seconds.") + def _create_collection(self, model): + """ + Create a collection for the model. + If the model has encrypted fields, build (or retrieve) the encrypted_fields schema. + """ + db = self.get_database() + db_table = model._meta.db_table + + if model_has_encrypted_fields(model): + # Encrypted path + client = self.connection.connection + auto_encryption_opts = getattr(client._options, "auto_encryption_opts", None) + if not auto_encryption_opts: + raise ImproperlyConfigured( + f"Encrypted fields found but DATABASES['{self.connection.alias}']['OPTIONS'] " + "is missing auto_encryption_opts." + ) + encrypted_fields = self._get_encrypted_fields(model) + db.create_collection(db_table, encryptedFields=encrypted_fields) + else: + # Unencrypted path + db.create_collection(db_table) + + def _get_encrypted_fields( + self, model, *, key_alt_name_prefix=None, path_prefix=None, create_data_keys=True + ): + """ + Return the encrypted fields map for the given model. The "prefix" + arguments are used when this method is called recursively on embedded + models. + """ + connection = self.connection + client = connection.connection + key_alt_name_prefix = key_alt_name_prefix or model._meta.db_table + path_prefix = path_prefix or "" + auto_encryption_opts = client._options.auto_encryption_opts + _, key_vault_collection = auto_encryption_opts._key_vault_namespace.split(".", 1) + key_vault = self.get_collection(key_vault_collection) + # Create partial unique index on keyAltNames. + # TODO: find a better place for this. It only needs to run once for an + # application's lifetime. + key_vault.create_index( + "keyAltNames", unique=True, partialFilterExpression={"keyAltNames": {"$exists": True}} + ) + # Select the KMS provider. + kms_providers = auto_encryption_opts._kms_providers + if len(kms_providers) == 1: + # If one provider is configured, no need to consult the router. + kms_provider = next(iter(kms_providers.keys())) + else: + # Otherwise, call the user-defined router.kms_provider(). + kms_provider = router.kms_provider(model) + if kms_provider == "local": + master_key = None + else: + master_key = connection.settings_dict["KMS_CREDENTIALS"][kms_provider] + # Generate the encrypted fields map. + field_list = [] + for field in model._meta.fields: + key_alt_name = f"{key_alt_name_prefix}.{field.column}" + path = f"{path_prefix}.{field.column}" if path_prefix else field.column + # Check non-encrypted EmbeddedModelFields for encrypted fields. + if isinstance(field, EmbeddedModelField) and not getattr(field, "encrypted", False): + embedded_result = self._get_encrypted_fields( + field.embedded_model, + key_alt_name_prefix=key_alt_name, + path_prefix=path, + create_data_keys=create_data_keys, + ) + # An EmbeddedModelField may not have any encrypted fields. + if embedded_result: + field_list.extend(embedded_result["fields"]) + continue + # Populate data for encrypted field. + if getattr(field, "encrypted", False): + if create_data_keys: + data_key = connection.client_encryption.create_data_key( + kms_provider=kms_provider, + key_alt_names=[key_alt_name], + master_key=master_key, + ) + else: + data_key = key_vault.find_one({"keyAltNames": key_alt_name}) + if data_key: + data_key = data_key["_id"] + else: + raise ImproperlyConfigured( + f"Encryption key {key_alt_name} not found. Have " + f"migrated the {model} model?" + ) + field_dict = { + "bsonType": field.db_type(connection), + "path": path, + "keyId": data_key, + } + if queries := getattr(field, "queries", None): + field_dict["queries"] = queries + field_list.append(field_dict) + return {"fields": field_list} + # GISSchemaEditor extends some SchemaEditor methods. class DatabaseSchemaEditor(GISSchemaEditor, BaseSchemaEditor): diff --git a/django_mongodb_backend/utils.py b/django_mongodb_backend/utils.py index 0240250cf..c655c8bc0 100644 --- a/django_mongodb_backend/utils.py +++ b/django_mongodb_backend/utils.py @@ -118,6 +118,7 @@ class OperationDebugWrapper: "create_indexes", "create_search_index", "drop", + "find_one", "index_information", "insert_many", "delete_many", @@ -193,3 +194,23 @@ def wrapper(self, *args, **kwargs): self.log(method, args, kwargs) return wrapper + + +def model_has_encrypted_fields(model): + """ + Recursively check if this model or any embedded models contain encrypted fields. + Returns True if encryption is found anywhere in the hierarchy. + """ + from django_mongodb_backend.fields import EmbeddedModelField # noqa: PLC0415 + + for field in model._meta.fields: + if getattr(field, "encrypted", False): + return True + + # Recursively check embedded models. + if isinstance(field, EmbeddedModelField) and model_has_encrypted_fields( + field.embedded_model + ): + return True + + return False diff --git a/docs/howto/index.rst b/docs/howto/index.rst index 95d7ef632..8451960ef 100644 --- a/docs/howto/index.rst +++ b/docs/howto/index.rst @@ -11,3 +11,4 @@ Project configuration :maxdepth: 1 contrib-apps + queryable-encryption diff --git a/docs/howto/queryable-encryption.rst b/docs/howto/queryable-encryption.rst new file mode 100644 index 000000000..ab827fb0e --- /dev/null +++ b/docs/howto/queryable-encryption.rst @@ -0,0 +1,313 @@ +================================ +Configuring Queryable Encryption +================================ + +.. versionadded:: 5.2.3 + +:doc:`manual:core/queryable-encryption` is a powerful MongoDB feature that +allows you to encrypt sensitive fields in your database while still supporting +queries on that encrypted data. + +This section will guide you through the process of configuring Queryable +Encryption in your Django project. + +.. admonition:: MongoDB requirements + + Queryable Encryption can be used with MongoDB replica sets or sharded + clusters running version 8.0 or later. Standalone instances are not + supported. The :ref:`manual:qe-compatibility-reference` table summarizes + which MongoDB server products support Queryable Encryption. + +Installation +============ + +In addition to Django MongoDB Backend's regular :doc:`installation +` and :doc:`configuration ` steps, Queryable +Encryption has additional Python dependencies: + +.. code-block:: console + + $ pip install django-mongodb-backend[encryption] + +.. _qe-configuring-databases-setting: + +Configuring the ``DATABASES`` setting +===================================== + +In addition to the :ref:`database settings ` +required to use Django MongoDB Backend, Queryable Encryption requires you to +configure a separate encrypted database connection in your +:setting:`django:DATABASES` setting. + +.. admonition:: Encrypted database + + An encrypted database is a separate database connection in your + :setting:`django:DATABASES` setting that is configured to use PyMongo's + :class:`automatic encryption + `. + +Here's how to configure an encrypted database using a local KMS provider and +encryption keys stored in the ``encryption.__keyVault`` collection:: + + import os + + from pymongo.encryption_options import AutoEncryptionOpts + + DATABASES = { + "default": { + "ENGINE": "django_mongodb_backend", + "HOST": "mongodb+srv://cluster0.example.mongodb.net", + "NAME": "my_database", + # ... + }, + "encrypted": { + "ENGINE": "django_mongodb_backend", + "HOST": "mongodb+srv://cluster0.example.mongodb.net", + "NAME": "my_database_encrypted", + "USER": "my_user", + "PASSWORD": "my_password", + "PORT": 27017, + "OPTIONS": { + "auto_encryption_opts": AutoEncryptionOpts( + key_vault_namespace="encryption.__keyVault", + kms_providers={"local": {"key": os.urandom(96)}}, + ) + }, + }, + } + +.. admonition:: Local KMS provider key + + In the example above, a random key is generated for the local KMS provider + using ``os.urandom(96)``. In a production environment, you should securely + :ref:`store and manage your encryption keys + `. + +.. _qe-configuring-database-routers-setting: + +Configuring the ``DATABASE_ROUTERS`` setting +============================================ + +Similar to configuring the :ref:`DATABASE_ROUTERS +` setting for +:doc:`embedded models `, Queryable Encryption +requires a :setting:`DATABASE_ROUTERS ` setting to +route database operations to the encrypted database. + +The following example shows how to configure a router for the "myapp" +application that routes database operations to the encrypted database for all +models in that application:: + + # myapp/routers.py + class EncryptedRouter: + def allow_migrate(self, db, app_label, model_name=None, **hints): + if app_label == "myapp": + return db == "encrypted" + # Prevent migrations on the encrypted database for other apps + if db == "encrypted": + return False + return None + + def db_for_read(self, model, **hints): + if model._meta.app_label == "myapp": + return "encrypted" + return None + + db_for_write = db_for_read + +Then in your Django settings, add the custom database router to the +:setting:`django:DATABASE_ROUTERS` setting:: + + # settings.py + DATABASE_ROUTERS = ["myapp.routers.EncryptedRouter"] + +.. _qe-configuring-kms: + +Configuring the Key Management Service (KMS) +============================================ + +To use Queryable Encryption, you must configure a Key Management Service (KMS) +to store and manage your encryption keys. Django MongoDB Backend allows you to +configure multiple KMS providers and select the appropriate provider for each +model using a custom database router. + +The KMS is responsible for managing the encryption keys used to encrypt and +decrypt data. The following table summarizes the available KMS configuration +options followed by an example of how to use them. + ++-------------------------------------------------------------------------+--------------------------------------------------------+ +| :setting:`KMS_CREDENTIALS ` | A dictionary of Key Management Service (KMS) | +| | credentials configured in the | +| | :setting:`django:DATABASES` setting. | ++-------------------------------------------------------------------------+--------------------------------------------------------+ +| :class:`kms_providers ` | A dictionary of KMS provider credentials used to | +| | access the KMS with ``kms_provider``. | ++-------------------------------------------------------------------------+--------------------------------------------------------+ +| :ref:`kms_provider ` | A single KMS provider name | +| | configured in your custom database | +| | router. | ++-------------------------------------------------------------------------+--------------------------------------------------------+ + +Example of KMS configuration with ``aws`` in your :class:`kms_providers +` setting:: + + from pymongo.encryption_options import AutoEncryptionOpts + + DATABASES = { + "encrypted": { + # ... + "OPTIONS": { + "auto_encryption_opts": AutoEncryptionOpts( + # ... + kms_providers={ + "aws": { + "accessKeyId": "your-access-key-id", + "secretAccessKey": "your-secret-access-key", + }, + }, + ), + }, + "KMS_CREDENTIALS": { + "aws": { + "key": os.getenv("AWS_KEY_ARN", ""), + "region": os.getenv("AWS_KEY_REGION", ""), + }, + }, + }, + } + +(TODO: If there's a use case for multiple providers, motivate with a use case +and add a test.) + +If you've configured multiple KMS providers, you must define logic to determine +the provider for each model in your :ref:`database router +`:: + + class EncryptedRouter: + # ... + def kms_provider(self, model, **hints): + return "aws" + +.. _qe-configuring-encrypted-fields-map: + +Configuring the ``encrypted_fields_map`` option +=============================================== + +When you configure the :ref:`DATABASES ` +setting for Queryable Encryption *without* specifying an +``encrypted_fields_map``, Django MongoDB Backend will create encrypted +collections, including encryption keys, when you :ref:`run migrations for models +that have encrypted fields `. + +Encryption keys for encrypted fields are stored in the key vault specified in +the :ref:`DATABASES ` setting. To see the keys created by +Django MongoDB Backend, along with the entire schema, you can run the +:djadmin:`showencryptedfieldsmap` command:: + + $ python manage.py showencryptedfieldsmap --database encrypted + +Use the output of :djadmin:`showencryptedfieldsmap` to set the +``encrypted_fields_map`` in :class:`AutoEncryptionOpts +` in your Django settings:: + + from bson import json_util + from pymongo.encryption_options import AutoEncryptionOpts + + DATABASES = { + "encrypted": { + # ... + "OPTIONS": { + "auto_encryption_opts": AutoEncryptionOpts( + # ... + encrypted_fields_map=json_util.loads( + """{ + "encrypt_patient": { + "fields": [ + { + "bsonType": "string", + "path": "patient_record.ssn", + "keyId": { + "$binary": { + "base64": "2MA29LaARIOqymYHGmi2mQ==", + "subType": "04" + } + }, + "queries": { + "queryType": "equality" + } + }, + ] + }}""" + ), + ), + }, + }, + } + +.. admonition:: Security consideration + + Supplying an encrypted fields map provides more security than relying on an + encrypted fields map obtained from the server. It protects against a + malicious server advertising a false encrypted fields map. + +Configuring the Automatic Encryption Shared Library +=================================================== + +The :ref:`manual:qe-reference-shared-library` is a preferred alternative to +:ref:`manual:qe-mongocryptd` and does not require you to start another process +to perform automatic encryption. + +In practice, if you use Atlas or Enterprise MongoDB, ``mongocryptd`` is already +configured for you, however in such cases the shared library is still +recommended for use with Queryable Encryption. + +You can :ref:`download the shared library +` from the +:ref:`manual:enterprise-official-packages` and configure it in your Django +settings using the ``crypt_shared_lib_path`` option in +:class:`AutoEncryptionOpts `. + +The following example shows how to configure the shared library in your Django +settings:: + + from pymongo.encryption_options import AutoEncryptionOpts + + DATABASES = { + "encrypted": { + # ... + "OPTIONS": { + "auto_encryption_opts": AutoEncryptionOpts( + # ... + crypt_shared_lib_path="/path/to/mongo_crypt_shared_v1.dylib", + ) + }, + # ... + }, + } + + +.. admonition:: Dynamic library path configuration + + The Automatic Encryption Shared Library is platform‑specific. Make sure to + download the correct version for your operating system and architecture, + and configure your environment so the system can locate it. + + Use the following variables depending on your platform: + + +---------------+---------------------------------+ + | **Platform** | **Environment Variable** | + +---------------+---------------------------------+ + | Windows | ``PATH`` | + +---------------+---------------------------------+ + | macOS | ``DYLD_FALLBACK_LIBRARY_PATH`` | + +---------------+---------------------------------+ + | Linux | ``LD_LIBRARY_PATH`` | + +---------------+---------------------------------+ + + For example on macOS, you can set the ``DYLD_FALLBACK_LIBRARY_PATH`` + environment variable in your shell before starting your Django application:: + + $ export DYLD_FALLBACK_LIBRARY_PATH="/path/to/mongo_crypt_shared_v1.dylib:$DYLD_FALLBACK_LIBRARY_PATH" + +You are now ready to :doc:`start developing applications +` with Queryable Encryption! diff --git a/docs/index.rst b/docs/index.rst index a5a60e84f..5f5e93108 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -46,10 +46,12 @@ Models - :doc:`ref/database` - :doc:`ref/contrib/gis` - :doc:`ref/django-admin` +- :doc:`ref/models/encrypted-fields` **Topic guides:** - :doc:`topics/embedded-models` +- :doc:`topics/queryable-encryption` - :doc:`topics/transactions` Forms diff --git a/docs/ref/django-admin.rst b/docs/ref/django-admin.rst index a491714cf..1e111eee8 100644 --- a/docs/ref/django-admin.rst +++ b/docs/ref/django-admin.rst @@ -13,3 +13,26 @@ in the :setting:`INSTALLED_APPS` setting. Available commands ================== + +``showencryptedfieldsmap`` +-------------------------- + +.. versionadded:: 5.2.3 + +.. django-admin:: showencryptedfieldsmap + + This command shows the mapping of encrypted fields to attributes including + data type, data keys and query types. Its output can be used to set the + :ref:`encrypted_fields_map ` argument + in :class:`AutoEncryptionOpts + `. + + .. django-admin-option:: --database DATABASE + + Specifies the database to use. Defaults to ``default``. + + To show the encrypted fields map for a database named ``encrypted``, run: + + .. code-block:: console + + $ python manage.py showencryptedfieldsmap --database encrypted diff --git a/docs/ref/index.rst b/docs/ref/index.rst index 94a11a2a8..47b27d466 100644 --- a/docs/ref/index.rst +++ b/docs/ref/index.rst @@ -9,5 +9,7 @@ API reference forms contrib/index database + models/encrypted-fields django-admin utils + settings diff --git a/docs/ref/models/encrypted-fields.rst b/docs/ref/models/encrypted-fields.rst new file mode 100644 index 000000000..f09dfa168 --- /dev/null +++ b/docs/ref/models/encrypted-fields.rst @@ -0,0 +1,108 @@ +================ +Encrypted fields +================ + +.. versionadded:: 5.2.3 + +Django MongoDB Backend supports :doc:`manual:core/queryable-encryption`. + +See :doc:`/howto/queryable-encryption` for more information on how to use +Queryable Encryption with Django MongoDB Backend. + +See the :doc:`/topics/queryable-encryption` topic guide for +more information on developing applications with Queryable Encryption. + +The following tables detailed which fields have encrypted counterparts. In all +cases, the encrypted field names are simply prefixed with ``Encrypted``, e.g. +``EncryptedCharField``. They are importable from +``django_mongodb_backend.fields``. + +.. csv-table:: ``django.db.models`` + :header: "Model Field", "Encrypted version available?" + + :class:`~django.db.models.BigIntegerField`, Yes + :class:`~django.db.models.BinaryField`, Yes + :class:`~django.db.models.BooleanField`, Yes + :class:`~django.db.models.CharField`, Yes + :class:`~django.db.models.DateField`, Yes + :class:`~django.db.models.DateTimeField`, Yes + :class:`~django.db.models.DecimalField`, Yes + :class:`~django.db.models.DurationField`, Yes + :class:`~django.db.models.EmailField`, Yes + :class:`~django.db.models.FileField`, No: the use case for encrypting this field is unclear. + :class:`~django.db.models.FilePathField`, No: the use case for encrypting this field is unclear. + :class:`~django.db.models.GenericIPAddressField`, Yes + :class:`~django.db.models.ImageField`, No: the use case for encrypting this field is unclear. + :class:`~django.db.models.IntegerField`, Yes + :class:`~django.db.models.JSONField`, No: ``JSONField`` isn't recommended. + :class:`~django.db.models.PositiveIntegerField`, Yes + :class:`~django.db.models.PositiveBigIntegerField`, Yes + :class:`~django.db.models.PositiveSmallIntegerField`, Yes + :class:`~django.db.models.SlugField`, No: it requires a unique index which Queryable Encryption doesn't support. + :class:`~django.db.models.SmallIntegerField`, Yes + :class:`~django.db.models.TimeField`, Yes + :class:`~django.db.models.TextField`, Yes + :class:`~django.db.models.URLField`, Yes + :class:`~django.db.models.UUIDField`, Yes + +.. csv-table:: ``django_mongodb_backend.fields`` + :header: "Model Field", "Encrypted version available?" + + :class:`~.fields.ArrayField`, Yes + :class:`~.fields.EmbeddedModelArrayField`, Yes + :class:`~.fields.EmbeddedModelField`, Yes + :class:`~.fields.ObjectIdField`, Yes + :class:`~.fields.PolymorphicEmbeddedModelField`, No: may be implemented in the future. + :class:`~.fields.PolymorphicEmbeddedModelArrayField`, No: may be implemented in the future. + +These fields don't support the ``queries`` argument: + +- ``EncryptedArrayField`` +- ``EncryptedEmbeddedModelArrayField`` +- ``EncryptedEmbeddedModelField`` + +Limitations +=========== + +MongoDB imposes some restrictions on encrypted fields: + +* They cannot be indexed. +* They cannot be part of a unique constraint. +* They cannot be null. + +``EncryptedFieldMixin`` +======================= + +.. class:: EncryptedFieldMixin + + .. versionadded:: 5.2.3 + + A mixin that can be used to create custom encrypted fields with Queryable + Encryption. + + To create an encrypted field, inherit from ``EncryptedFieldMixin`` and + your custom field class: + + .. code-block:: python + + from django.db import models + from django_mongodb_backend.fields import EncryptedFieldMixin + from myapp.fields import MyField + + + class MyEncryptedField(EncryptedFieldMixin, MyField): + pass + + + You can then use your custom encrypted field in a model, specifying the + desired query types: + + .. code-block:: python + + class MyModel(models.Model): + my_encrypted_field = MyEncryptedField( + queries={"queryType": "equality"}, + ) + my_encrypted_field_too = MyEncryptedField( + queries={"queryType": "range"}, + ) diff --git a/docs/ref/settings.rst b/docs/ref/settings.rst new file mode 100644 index 000000000..233515262 --- /dev/null +++ b/docs/ref/settings.rst @@ -0,0 +1,45 @@ +======== +Settings +======== + +.. _queryable-encryption-settings: + +Queryable Encryption +==================== + +The following :setting:`django:DATABASES` inner options support configuration of +Key Management Service (KMS) credentials for Queryable Encryption. + +.. setting:: DATABASE-KMS-CREDENTIALS + +``KMS_CREDENTIALS`` +------------------- + +Default: ``{}`` (empty dictionary) + +A dictionary of Key Management Service (KMS) credential key-value pairs. These +credentials are required to access your KMS provider (such as AWS KMS, Azure Key +Vault, or GCP KMS) for encrypting and decrypting data using Queryable +Encryption. + +For example after :doc:`/howto/queryable-encryption`, to configure AWS KMS, +Azure Key Vault, or GCP KMS credentials, you can set ``KMS_CREDENTIALS`` in +your :setting:`django:DATABASES` settings as follows: + +.. code-block:: python + + DATABASES["encrypted"]["KMS_CREDENTIALS"] = { + "aws": { + "key": os.getenv("AWS_KEY_ARN", ""), + "region": os.getenv("AWS_KEY_REGION", ""), + }, + "azure": { + "key": os.getenv("AZURE_KEY_VAULT_URL", ""), + "client_id": os.getenv("AZURE_CLIENT_ID", ""), + "client_secret": os.getenv("AZURE_CLIENT_SECRET", ""), + }, + "gcp": { + "key": os.getenv("GCP_KEY_NAME", ""), + "project_id": os.getenv("GCP_PROJECT_ID", ""), + }, + } diff --git a/docs/ref/utils.rst b/docs/ref/utils.rst index 5cdb0ccf3..0312c1f89 100644 --- a/docs/ref/utils.rst +++ b/docs/ref/utils.rst @@ -48,3 +48,37 @@ following parts can be considered stable. But for maximum flexibility, construct :setting:`DATABASES` manually as described in :ref:`configuring-databases-setting`. + +``model_has_encrypted_fields()`` +================================= + +.. function:: model_has_encrypted_fields(model) + + .. versionadded:: 5.2.3 + + Returns ``True`` if the given Django model has any fields that use + encrypted models. + + Example usage in a :ref:`database router + `:: + + from django_mongodb_backend.utils import model_has_encrypted_fields + + class EncryptedRouter: + def db_for_read(self, model, **hints): + if model_has_encrypted_fields(model): + return "encrypted" + return "default" + + def db_for_write(self, model, **hints): + if model_has_encrypted_fields(model): + return "encrypted" + return "default" + + def allow_migrate(self, db, app_label, model_name=None, **hints): + if hints.get("model"): + if model_has_encrypted_fields(hints["model"]): + return db == "encrypted" + else: + return db == "default" + return None diff --git a/docs/releases/5.2.x.rst b/docs/releases/5.2.x.rst index bed6a6cac..1a0828c81 100644 --- a/docs/releases/5.2.x.rst +++ b/docs/releases/5.2.x.rst @@ -124,6 +124,7 @@ New features :class:`~.fields.PolymorphicEmbeddedModelArrayField` for storing a model instance or list of model instances that may be of more than one model class. - Added :doc:`GeoDjango support `. +- Added :doc:`Queryable Encryption support `. Backwards incompatible changes ------------------------------ diff --git a/docs/topics/index.rst b/docs/topics/index.rst index 6e06b8125..a02b35239 100644 --- a/docs/topics/index.rst +++ b/docs/topics/index.rst @@ -9,5 +9,6 @@ know: :maxdepth: 2 embedded-models + queryable-encryption transactions known-issues diff --git a/docs/topics/known-issues.rst b/docs/topics/known-issues.rst index 96cd6dec1..2d17b352a 100644 --- a/docs/topics/known-issues.rst +++ b/docs/topics/known-issues.rst @@ -26,6 +26,8 @@ Model fields - :class:`~django.db.models.CompositePrimaryKey` - :class:`~django.db.models.GeneratedField` +.. _known-issues-limitations-querying: + Querying ======== diff --git a/docs/topics/queryable-encryption.rst b/docs/topics/queryable-encryption.rst new file mode 100644 index 000000000..0d6372f69 --- /dev/null +++ b/docs/topics/queryable-encryption.rst @@ -0,0 +1,140 @@ +==================== +Queryable Encryption +==================== + +.. versionadded:: 5.2.3 + +Once you have successfully set up MongoDB Queryable Encryption as described in +:doc:`the installation guide `, you can start +using encrypted fields in your Django models. + +Encrypted fields +================ + +The basics +---------- + +:doc:`Encrypted fields ` may be used to protect +sensitive data like social security numbers, credit card information, or +personal health information. With Queryable Encryption, you can also perform +queries on certain encrypted fields. To use encrypted fields in your models, +import the necessary field types from ``django_mongodb_backend.models`` and +define your models as usual. + +Here are models based on the `Python Queryable Encryption Tutorial`_:: + + # myapp/models.py + from django.db import models + from django_mongodb_backend.models import EmbeddedModel + from django_mongodb_backend.fields import ( + EmbeddedModelField, + EncryptedCharField, + EncryptedEmbeddedModelField, + ) + + + class Patient(models.Model): + patient_name = models.CharField(max_length=255) + patient_id = models.BigIntegerField() + patient_record = EmbeddedModelField("PatientRecord") + + def __str__(self): + return f"{self.patient_name} ({self.patient_id})" + + + class PatientRecord(EmbeddedModel): + ssn = EncryptedCharField(max_length=11) + billing = EncryptedEmbeddedModelField("Billing") + bill_amount = models.DecimalField(max_digits=10, decimal_places=2) + + + class Billing(EmbeddedModel): + cc_type = models.CharField(max_length=50) + cc_number = models.CharField(max_length=20) + +.. _Python Queryable Encryption Tutorial: https://github.com/mongodb/docs/tree/main/content/manual/manual/source/includes/qe-tutorials/python + +.. _qe-migrations: + +Migrations +---------- + +Once you have defined your models, create migrations with: + +.. code-block:: console + + $ python manage.py makemigrations + +Then run the migrations with: + +.. code-block:: console + + $ python manage.py migrate --database encrypted + +Now create and manipulate instances of the data just like any other Django +model data. The fields will automatically handle encryption and decryption, +ensuring that :ref:`sensitive data is stored securely in the database +`. + +Routers +------- + +The example above requires a :ref:`database router +` to direct operations on models with +encrypted fields to the appropriate database. It also requires the use of a +:ref:`router for embedded models `. Here +is an example that includes both:: + + # myproject/settings.py + DATABASE_ROUTERS = [ + "django_mongodb_backend.routers.MongoRouter", + "myproject.routers.EncryptedRouter", + ] + +Querying encrypted fields +------------------------- + +In order to query encrypted fields, you must define the queryable encryption +query type in the model field definition. For example, if you want to query the +``ssn`` field for equality, you can define it as follows:: + + class PatientRecord(EmbeddedModel): + ssn = EncryptedCharField(max_length=11, queries={"queryType": "equality"}) + billing = EncryptedEmbeddedModelField("Billing") + bill_amount = models.DecimalField(max_digits=10, decimal_places=2) + +Then you can perform a query like this: + +.. code-block:: console + + >>> patient = Patient.objects.get(patient_record__ssn="123-45-6789") + >>> patient.name + 'John Doe' + +.. _qe-available-query-types: + +Available query types +~~~~~~~~~~~~~~~~~~~~~ + +The ``queries`` option should be a dictionary that specifies the type of queries +that can be performed on the field. Of the :ref:`available query types +` Django MongoDB Backend currently +supports: + +- ``equality`` +- ``range`` + +.. admonition:: Query types vs. Django lookups + + Range queries in Queryable Encryption are different from Django's + :ref:`range lookups `. Range queries allow you to + perform comparisons on encrypted fields, while Django's range lookups are + used for filtering based on a range of values. + +QuerySet limitations +~~~~~~~~~~~~~~~~~~~~ + +In addition to :ref:`Django MongoDB Backend's QuerySet limitations +`, + +.. TODO diff --git a/pyproject.toml b/pyproject.toml index 0549f02ef..b4f4841ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ docs = [ "furo>=2025.7.19", "sphinx-copybutton", ] +encryption = ["pymongo[encryption]"] [project.urls] Homepage = "https://www.mongodb.org" diff --git a/tests/backend_/test_features.py b/tests/backend_/test_features.py index 05959fa70..d505c7fab 100644 --- a/tests/backend_/test_features.py +++ b/tests/backend_/test_features.py @@ -44,3 +44,83 @@ def mocked_command(command): with patch("pymongo.synchronous.database.Database.command", wraps=mocked_command): self.assertIs(connection.features._supports_transactions, False) + + +class SupportsQueryableEncryptionTests(TestCase): + def setUp(self): + # Clear the cached property. + connection.features.__dict__.pop("supports_queryable_encryption", None) + # Must initialize the feature before patching it. + connection.features._supports_transactions # noqa: B018 + + def tearDown(self): + del connection.features.supports_queryable_encryption + + @staticmethod + def enterprise_response(command): + if command == "buildInfo": + return {"modules": ["enterprise"]} + raise Exception("Unexpected command") + + @staticmethod + def non_enterprise_response(command): + if command == "buildInfo": + return {"modules": []} + raise Exception("Unexpected command") + + def test_supported_on_atlas(self): + """Supported on MongoDB 8.0+ Atlas replica set or sharded cluster.""" + with ( + patch( + "pymongo.synchronous.database.Database.command", wraps=self.non_enterprise_response + ), + patch("django.db.connection.features.supports_atlas_search", True), + patch("django.db.connection.features._supports_transactions", True), + patch("django.db.connection.features.is_mongodb_8_0", True), + ): + self.assertIs(connection.features.supports_queryable_encryption, True) + + def test_supported_on_enterprise(self): + """Supported on MongoDB 8.0+ Enterprise replica set or sharded cluster.""" + with ( + patch("pymongo.synchronous.database.Database.command", wraps=self.enterprise_response), + patch("django.db.connection.features.supports_atlas_search", False), + patch("django.db.connection.features._supports_transactions", True), + patch("django.db.connection.features.is_mongodb_8_0", True), + ): + self.assertIs(connection.features.supports_queryable_encryption, True) + + def test_atlas_or_enterprise_required(self): + """Not supported on MongoDB Community Edition.""" + with ( + patch( + "pymongo.synchronous.database.Database.command", wraps=self.non_enterprise_response + ), + patch("django.db.connection.features.supports_atlas_search", False), + patch("django.db.connection.features._supports_transactions", True), + patch("django.db.connection.features.is_mongodb_8_0", True), + ): + self.assertIs(connection.features.supports_queryable_encryption, False) + + def test_transactions_required(self): + """ + Not supported if database isn't a replica set or sharded cluster + (i.e. DatabaseFeatures._supports_transactions = False). + """ + with ( + patch("pymongo.synchronous.database.Database.command", wraps=self.enterprise_response), + patch("django.db.connection.features.supports_atlas_search", False), + patch("django.db.connection.features._supports_transactions", False), + patch("django.db.connection.features.is_mongodb_8_0", True), + ): + self.assertIs(connection.features.supports_queryable_encryption, False) + + def test_mongodb_8_0_required(self): + """Not supported on MongoDB < 8.0""" + with ( + patch("pymongo.synchronous.database.Database.command", wraps=self.enterprise_response), + patch("django.db.connection.features.supports_atlas_search", False), + patch("django.db.connection.features._supports_transactions", True), + patch("django.db.connection.features.is_mongodb_8_0", False), + ): + self.assertIs(connection.features.supports_queryable_encryption, False) diff --git a/tests/encryption_/__init__.py b/tests/encryption_/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/encryption_/models.py b/tests/encryption_/models.py new file mode 100644 index 000000000..995e4760c --- /dev/null +++ b/tests/encryption_/models.py @@ -0,0 +1,184 @@ +from django.db import models + +from django_mongodb_backend.fields import ( + EmbeddedModelField, + EncryptedArrayField, + EncryptedBigIntegerField, + EncryptedBinaryField, + EncryptedBooleanField, + EncryptedCharField, + EncryptedDateField, + EncryptedDateTimeField, + EncryptedDecimalField, + EncryptedDurationField, + EncryptedEmailField, + EncryptedEmbeddedModelArrayField, + EncryptedEmbeddedModelField, + EncryptedFloatField, + EncryptedGenericIPAddressField, + EncryptedIntegerField, + EncryptedObjectIdField, + EncryptedPositiveBigIntegerField, + EncryptedPositiveIntegerField, + EncryptedPositiveSmallIntegerField, + EncryptedSmallIntegerField, + EncryptedTextField, + EncryptedTimeField, + EncryptedURLField, + EncryptedUUIDField, +) +from django_mongodb_backend.models import EmbeddedModel + + +class Author(models.Model): + name = models.CharField(max_length=255) + + +class Book(models.Model): + title = models.CharField(max_length=255) + author = models.ForeignKey(Author, models.CASCADE) + + +class EncryptedTestModel(models.Model): + class Meta: + abstract = True + required_db_features = {"supports_queryable_encryption"} + + +# Array models +class ArrayModel(EncryptedTestModel): + values = EncryptedArrayField( + models.IntegerField(), + size=5, + ) + + +# Embedded models +class Patient(EncryptedTestModel): + patient_name = models.CharField(max_length=255) + patient_id = models.BigIntegerField() + patient_record = EmbeddedModelField("PatientRecord") + + def __str__(self): + return f"{self.patient_name} ({self.patient_id})" + + +class PatientRecord(EmbeddedModel): + ssn = EncryptedCharField(max_length=11, queries={"queryType": "equality"}) + billing = EncryptedEmbeddedModelField("Billing") + bill_amount = models.DecimalField(max_digits=10, decimal_places=2) + + +class Billing(EmbeddedModel): + cc_type = models.CharField(max_length=50) + cc_number = models.CharField(max_length=20) + + +# Embedded array models +class Actor(EmbeddedModel): + name = models.CharField(max_length=100) + + +class Movie(EncryptedTestModel): + title = models.CharField(max_length=200) + plot = models.TextField(blank=True) + runtime = models.IntegerField(default=0) + released = models.DateTimeField("release date") + cast = EncryptedEmbeddedModelArrayField(Actor) + + def __str__(self): + return self.title + + +# Equality-queryable field models +class BinaryModel(EncryptedTestModel): + value = EncryptedBinaryField(queries={"queryType": "equality"}) + + +class BooleanModel(EncryptedTestModel): + value = EncryptedBooleanField(queries={"queryType": "equality"}) + + +class CharModel(EncryptedTestModel): + value = EncryptedCharField(max_length=255, queries={"queryType": "equality"}) + + +class EmailModel(EncryptedTestModel): + value = EncryptedEmailField(max_length=255, queries={"queryType": "equality"}) + + +class GenericIPAddressModel(EncryptedTestModel): + value = EncryptedGenericIPAddressField(queries={"queryType": "equality"}) + + +class ObjectIdModel(EncryptedTestModel): + value = EncryptedObjectIdField(queries={"queryType": "equality"}) + + +class TextModel(EncryptedTestModel): + value = EncryptedTextField(queries={"queryType": "equality"}) + + +class URLModel(EncryptedTestModel): + value = EncryptedURLField(max_length=500, queries={"queryType": "equality"}) + + +class UUIDModel(EncryptedTestModel): + value = EncryptedUUIDField(queries={"queryType": "equality"}) + + +# Range-queryable field models +class BigIntegerModel(EncryptedTestModel): + value = EncryptedBigIntegerField(queries={"queryType": "range"}) + + +class DateModel(EncryptedTestModel): + value = EncryptedDateField(queries={"queryType": "range"}) + + +class DateTimeModel(EncryptedTestModel): + value = EncryptedDateTimeField(queries={"queryType": "range"}) + + +class DecimalModel(EncryptedTestModel): + value = EncryptedDecimalField(max_digits=10, decimal_places=2, queries={"queryType": "range"}) + + +class DurationModel(EncryptedTestModel): + value = EncryptedDurationField(queries={"queryType": "range"}) + + +class FloatModel(EncryptedTestModel): + value = EncryptedFloatField(queries={"queryType": "range"}) + + +class IntegerModel(EncryptedTestModel): + value = EncryptedIntegerField(queries={"queryType": "range"}) + + +class PositiveBigIntegerModel(EncryptedTestModel): + value = EncryptedPositiveBigIntegerField(queries={"queryType": "range"}) + + +class PositiveIntegerModel(EncryptedTestModel): + value = EncryptedPositiveIntegerField(queries={"queryType": "range"}) + + +class PositiveSmallIntegerModel(EncryptedTestModel): + value = EncryptedPositiveSmallIntegerField(queries={"queryType": "range"}) + + +class SmallIntegerModel(EncryptedTestModel): + value = EncryptedSmallIntegerField(queries={"queryType": "range"}) + + +class TimeModel(EncryptedTestModel): + value = EncryptedTimeField(queries={"queryType": "range"}) + + +class EncryptionKey(models.Model): + key_alt_name = models.CharField(max_length=500, db_column="keyAltNames") + + class Meta: + db_table = "__keyVault" + managed = False diff --git a/tests/encryption_/test_base.py b/tests/encryption_/test_base.py new file mode 100644 index 000000000..0c165d19a --- /dev/null +++ b/tests/encryption_/test_base.py @@ -0,0 +1,21 @@ +import pymongo +from bson.binary import Binary +from django.conf import settings +from django.db import connections +from django.test import TestCase, skipUnlessDBFeature + + +@skipUnlessDBFeature("supports_queryable_encryption") +class EncryptionTestCase(TestCase): + databases = {"default", "encrypted"} + maxDiff = None + + def assertEncrypted(self, model, field): + # Access encrypted database from an unencrypted connection + conn_params = connections["default"].get_connection_params() + db_name = settings.DATABASES["encrypted"]["NAME"] + with pymongo.MongoClient(**conn_params) as new_connection: + db = new_connection[db_name] + collection = db[model._meta.db_table] + data = collection.find_one({}, {field: 1, "_id": 0}) + self.assertIsInstance(data[field], Binary) diff --git a/tests/encryption_/test_fields.py b/tests/encryption_/test_fields.py new file mode 100644 index 000000000..ad3fb9513 --- /dev/null +++ b/tests/encryption_/test_fields.py @@ -0,0 +1,404 @@ +import datetime +import uuid +from decimal import Decimal +from operator import attrgetter + +from bson import ObjectId +from django.db import DatabaseError +from django.db.models import Avg + +from django_mongodb_backend.fields import ( + EncryptedArrayField, + EncryptedCharField, + EncryptedEmbeddedModelArrayField, + EncryptedEmbeddedModelField, + EncryptedIntegerField, +) + +from .models import ( + Actor, + ArrayModel, + BigIntegerModel, + Billing, + BinaryModel, + Book, + BooleanModel, + CharModel, + DateModel, + DateTimeModel, + DecimalModel, + DurationModel, + EmailModel, + FloatModel, + GenericIPAddressModel, + IntegerModel, + Movie, + ObjectIdModel, + Patient, + PatientRecord, + PositiveBigIntegerModel, + PositiveIntegerModel, + PositiveSmallIntegerModel, + SmallIntegerModel, + TextModel, + TimeModel, + URLModel, + UUIDModel, +) +from .test_base import EncryptionTestCase + + +class ArrayModelTests(EncryptionTestCase): + def setUp(self): + self.array_model = ArrayModel.objects.create(values=[1, 2, 3, 4, 5]) + + def test_array(self): + array_model = ArrayModel.objects.get(id=self.array_model.id) + self.assertEqual(array_model.values, [1, 2, 3, 4, 5]) + self.assertEncrypted(self.array_model, "values") + + +class EmbeddedModelTests(EncryptionTestCase): + def setUp(self): + self.billing = Billing(cc_type="Visa", cc_number="4111111111111111") + self.patient_record = PatientRecord(ssn="123-45-6789", billing=self.billing) + self.patient = Patient.objects.create( + patient_name="John Doe", patient_id=123456789, patient_record=self.patient_record + ) + + def test_object(self): + patient = Patient.objects.get(id=self.patient.id) + self.assertEqual(patient.patient_record.ssn, "123-45-6789") + self.assertEqual(patient.patient_record.billing.cc_type, "Visa") + self.assertEqual(patient.patient_record.billing.cc_number, "4111111111111111") + + +class EmbeddedModelArrayTests(EncryptionTestCase): + def setUp(self): + self.actor1 = Actor(name="Actor One") + self.actor2 = Actor(name="Actor Two") + self.movie = Movie.objects.create( + title="Sample Movie", + cast=[self.actor1, self.actor2], + released=datetime.date(2024, 6, 1), + ) + + def test_array(self): + movie = Movie.objects.get(id=self.movie.id) + self.assertEqual(len(movie.cast), 2) + self.assertEqual(movie.cast[0].name, "Actor One") + self.assertEqual(movie.cast[1].name, "Actor Two") + self.assertEncrypted(movie, "cast") + + +class FieldTests(EncryptionTestCase): + def assertEquality(self, model_cls, val): + model_cls.objects.create(value=val) + fetched = model_cls.objects.get(value=val) + self.assertEqual(fetched.value, val) + + def assertRange(self, model_cls, *, low, high, threshold): + model_cls.objects.create(value=low) + model_cls.objects.create(value=high) + self.assertEqual(model_cls.objects.get(value=low).value, low) + self.assertEqual(model_cls.objects.get(value=high).value, high) + objs = list(model_cls.objects.filter(value__gt=threshold)) + self.assertEqual(len(objs), 1) + self.assertEqual(objs[0].value, high) + + # Equality-only fields + def test_binary(self): + self.assertEquality(BinaryModel, b"\x00\x01\x02") + self.assertEncrypted(BinaryModel, "value") + + def test_boolean(self): + self.assertEquality(BooleanModel, True) + self.assertEncrypted(BooleanModel, "value") + + def test_char(self): + self.assertEquality(CharModel, "hello") + self.assertEncrypted(CharModel, "value") + + def test_email(self): + self.assertEquality(EmailModel, "test@example.com") + self.assertEncrypted(EmailModel, "value") + + def test_ip(self): + self.assertEquality(GenericIPAddressModel, "192.168.0.1") + self.assertEncrypted(GenericIPAddressModel, "value") + + def test_objectid(self): + self.assertEquality(ObjectIdModel, ObjectId()) + self.assertEncrypted(ObjectIdModel, "value") + + def test_text(self): + self.assertEquality(TextModel, "some text") + self.assertEncrypted(TextModel, "value") + + def test_url(self): + self.assertEquality(URLModel, "https://example.com") + self.assertEncrypted(URLModel, "value") + + def test_uuid(self): + self.assertEquality(UUIDModel, uuid.uuid4()) + self.assertEncrypted(UUIDModel, "value") + + # Range fields + def test_big_integer(self): + self.assertRange(BigIntegerModel, low=100, high=200, threshold=150) + self.assertEncrypted(BigIntegerModel, "value") + + def test_date(self): + self.assertRange( + DateModel, + low=datetime.date(2024, 6, 1), + high=datetime.date(2024, 6, 10), + threshold=datetime.date(2024, 6, 5), + ) + self.assertEncrypted(DateModel, "value") + + def test_datetime(self): + self.assertRange( + DateTimeModel, + low=datetime.datetime(2024, 6, 1, 12, 0), + high=datetime.datetime(2024, 6, 2, 12, 0), + threshold=datetime.datetime(2024, 6, 2, 0, 0), + ) + self.assertEncrypted(DateTimeModel, "value") + + def test_decimal(self): + self.assertRange( + DecimalModel, + low=Decimal("123.45"), + high=Decimal("200.50"), + threshold=Decimal("150"), + ) + self.assertEncrypted(DecimalModel, "value") + + def test_duration(self): + self.assertRange( + DurationModel, + low=datetime.timedelta(days=3), + high=datetime.timedelta(days=10), + threshold=datetime.timedelta(days=5), + ) + self.assertEncrypted(DurationModel, "value") + + def test_float(self): + self.assertRange(FloatModel, low=1.23, high=4.56, threshold=3.0) + self.assertEncrypted(FloatModel, "value") + + def test_integer(self): + self.assertRange(IntegerModel, low=5, high=10, threshold=7) + self.assertEncrypted(IntegerModel, "value") + + def test_positive_big_integer(self): + self.assertRange(PositiveBigIntegerModel, low=100, high=500, threshold=200) + self.assertEncrypted(PositiveBigIntegerModel, "value") + + def test_positive_integer(self): + self.assertRange(PositiveIntegerModel, low=10, high=20, threshold=15) + self.assertEncrypted(PositiveIntegerModel, "value") + + def test_positive_small_integer(self): + self.assertRange(PositiveSmallIntegerModel, low=5, high=8, threshold=6) + self.assertEncrypted(PositiveSmallIntegerModel, "value") + + def test_small_integer(self): + self.assertRange(SmallIntegerModel, low=-5, high=2, threshold=0) + self.assertEncrypted(SmallIntegerModel, "value") + + def test_time(self): + self.assertRange( + TimeModel, + low=datetime.time(10, 0), + high=datetime.time(15, 0), + threshold=datetime.time(12, 0), + ) + self.assertEncrypted(TimeModel, "value") + + +class QueryTests(EncryptionTestCase): + def test_aggregate(self): + msg = ( + "Aggregation stage $internalFacetTeeConsumer is not allowed or " + "supported with automatic encryption." + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(IntegerModel.objects.aggregate(Avg("value"))) + + def test_alias(self): + msg = ( + "Cannot group on field '_id.value' which is encrypted with the " + "random algorithm or whose encryption properties are not known " + "until runtime" + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(IntegerModel.objects.alias(avg=Avg("value"))) + + def test_annotate(self): + msg = ( + "Cannot group on field '_id.value' which is encrypted with the " + "random algorithm or whose encryption properties are not known " + "until runtime" + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(IntegerModel.objects.annotate(avg=Avg("value"))) + + def test_bulk_create(self): + CharModel.objects.bulk_create([CharModel(value="abc"), CharModel(value="xyz")]) + self.assertQuerySetEqual( + CharModel.objects.order_by("pk"), ["abc", "xyz"], attrgetter("value") + ) + + def test_bulk_update(self): + objs = [ + CharModel.objects.create(value="abc"), + CharModel.objects.create(value="xyz"), + ] + objs[0].value = "def" + objs[1].value = "mno" + msg = "Multi-document updates are not allowed with Queryable Encryption" + with self.assertRaisesMessage(DatabaseError, msg): + CharModel.objects.bulk_update(objs, ["value"]) + + def test_contains(self): + obj = CharModel.objects.create(value="abc") + self.assertIs(CharModel.objects.contains(obj), True) + + def test_count(self): + msg = ( + "Aggregation stage $internalFacetTeeConsumer is not allowed or " + "supported with automatic encryption." + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(CharModel.objects.count()) + + def test_dates(self): + msg = ( + "If the value type is a date, the type of the index must also be date (and vice versa)." + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(DateModel.objects.dates("value", "year")) + + def test_datetimes(self): + msg = ( + "If the value type is a date, the type of the index must also be date (and vice versa)." + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(DateTimeModel.objects.datetimes("value", "year")) + + def test_distinct(self): + msg = ( + "Cannot group on field '_id.value' which is encrypted with the " + "random algorithm or whose encryption properties are not known " + "until runtime" + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(CharModel.objects.distinct("value")) + + def test_exclude(self): + obj1 = CharModel.objects.create(value="abc") + obj2 = CharModel.objects.create(value="xyz") + self.assertSequenceEqual(CharModel.objects.exclude(value=obj1.value), [obj2]) + + def test_exists(self): + self.assertIs(CharModel.objects.exists(), False) + + def test_get_or_create(self): + obj1, created1 = CharModel.objects.get_or_create(value="abc") + self.assertIs(created1, True) + obj2, created2 = CharModel.objects.get_or_create(value="abc") + self.assertIs(created2, False) + self.assertEqual(obj1, obj2) + + def test_join(self): + list(Book.objects.filter(author__name="xxx")) + + def test_order_by(self): + msg = "Cannot add an encrypted field as a prefix of another encrypted field" + with self.assertRaisesMessage(DatabaseError, msg): + list(CharModel.objects.order_by("value")) + + def test_select_related(self): + list(Book.objects.select_related("author")) + + def test_update(self): + msg = "Multi-document updates are not allowed with Queryable Encryption" + with self.assertRaisesMessage(DatabaseError, msg): + self.assertEqual(CharModel.objects.update(value="xyz"), 1) + + def test_update_or_create(self): + # TODO + # msg = "Multi-document updates are not allowed with Queryable Encryption" + # with self.assertRaisesMessage(DatabaseError, msg): + # obj, created = CharModel.objects.update_or_create(value="xyz"), 1) + pass + + def test_union(self): + msg = "Aggregation stage $unionWith is not allowed or supported with automatic encryption." + qs1 = IntegerModel.objects.filter(value__gt=1) + qs2 = IntegerModel.objects.filter(value__gte=8) + with self.assertRaisesMessage(DatabaseError, msg): + list(qs1.union(qs2)) + + def test_values(self): + list(CharModel.objects.values("value")) + + def test_values_list(self): + list(CharModel.objects.values_list("value")) + + +class FieldMixinTests(EncryptionTestCase): + def test_db_index(self): + msg = "'db_index=True' is not supported on encrypted fields." + with self.assertRaisesMessage(ValueError, msg): + EncryptedIntegerField(db_index=True) + + def test_null(self): + msg = "'null=True' is not supported on encrypted fields." + with self.assertRaisesMessage(ValueError, msg): + EncryptedIntegerField(null=True) + + def test_unique(self): + msg = "'unique=True' is not supported on encrypted fields." + with self.assertRaisesMessage(ValueError, msg): + EncryptedIntegerField(unique=True) + + def test_deconstruct_preserves_queries_and_rewrites_path(self): + field = EncryptedCharField(max_length=50, queries={"field": "value"}) + field.name = "ssn" + name, path, args, kwargs = field.deconstruct() + + # Name is preserved + self.assertEqual(name, "ssn") + + # Path is rewritten from 'encrypted_model' to regular fields path + self.assertEqual(path, "django_mongodb_backend.fields.EncryptedCharField") + + # No positional args for CharField + self.assertEqual(args, []) + + # Queries value is preserved in kwargs + self.assertIn("queries", kwargs) + self.assertEqual(kwargs["queries"], {"field": "value"}) + + # Reconstruct from deconstruct output + new_field = EncryptedCharField(*args, **kwargs) + + # Reconstructed field is equivalent + self.assertEqual(new_field.queries, field.queries) + self.assertIsNot(new_field, field) + self.assertEqual(new_field.max_length, field.max_length) + + def test_fields_without_queries(self): + """Some field types (array, object) can't be queried.""" + for field in ( + EncryptedArrayField, + EncryptedEmbeddedModelField, + EncryptedEmbeddedModelArrayField, + ): + with self.subTest(field=field): + msg = f"{field.__name__} does not support the queries argument." + with self.assertRaisesMessage(ValueError, msg): + field(Actor, queries={}) diff --git a/tests/encryption_/test_management.py b/tests/encryption_/test_management.py new file mode 100644 index 000000000..096ccab3a --- /dev/null +++ b/tests/encryption_/test_management.py @@ -0,0 +1,131 @@ +from io import StringIO + +from bson import json_util +from django.core.exceptions import ImproperlyConfigured +from django.core.management import call_command +from django.db import connections +from django.test import modify_settings + +from .models import EncryptionKey +from .test_base import EncryptionTestCase + + +@modify_settings(INSTALLED_APPS={"prepend": "django_mongodb_backend"}) +class CommandTests(EncryptionTestCase): + # Expected encrypted field maps for all Encrypted* models + expected_maps = { + "encryption__patient": { + "fields": [ + { + "bsonType": "string", + "path": "patient_record.ssn", + "queries": {"queryType": "equality"}, + }, + {"bsonType": "object", "path": "patient_record.billing"}, + ] + }, + # Equality-queryable fields + "encryption__binarymodel": { + "fields": [ + {"bsonType": "binData", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "encryption__booleanmodel": { + "fields": [{"bsonType": "bool", "path": "value", "queries": {"queryType": "equality"}}] + }, + "encryption__charmodel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "encryption__emailmodel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "encryption__genericipaddressmodel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "encryption__textmodel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "encryption__urlmodel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + # Range-queryable fields + "encryption__bigintegermodel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__datemodel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__datetimemodel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__decimalmodel": { + "fields": [{"bsonType": "decimal", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__durationmodel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__floatmodel": { + "fields": [{"bsonType": "double", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__integermodel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__positivebigintegermodel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__positiveintegermodel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__positivesmallintegermodel": { + "fields": [{"bsonType": "int", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__smallintegermodel": { + "fields": [{"bsonType": "int", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__timemodel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + } + + def _compare_output(self, expected, actual): + for field in actual["fields"]: + del field["keyId"] # Can't compare dynamic value + self.assertEqual(expected, actual) + + def test_show_encrypted_fields_map(self): + out = StringIO() + call_command("showencryptedfieldsmap", "--database", "encrypted", verbosity=0, stdout=out) + command_output = json_util.loads(out.getvalue()) + + # Loop through each expected model + for model_key, expected in self.expected_maps.items(): + with self.subTest(model=model_key): + self.assertIn(model_key, command_output) + self._compare_output(expected, command_output[model_key]) + + def test_missing_key(self): + test_key = "encryption__patient.patient_record.ssn" + msg = ( + f"Encryption key {test_key} not found. Have migrated the " + " model?" + ) + EncryptionKey.objects.filter(key_alt_name=test_key).delete() + try: + with self.assertRaisesMessage(ImproperlyConfigured, msg): + call_command("showencryptedfieldsmap", "--database", "encrypted", verbosity=0) + finally: + # Replace the deleted key. + connections["encrypted"].client_encryption.create_data_key( + kms_provider="local", + key_alt_names=[test_key], + ) diff --git a/tests/encryption_/test_schema.py b/tests/encryption_/test_schema.py new file mode 100644 index 000000000..ba70c5545 --- /dev/null +++ b/tests/encryption_/test_schema.py @@ -0,0 +1,136 @@ +from bson.binary import Binary +from django.db import connections + +from . import models +from .models import EncryptionKey +from .test_base import EncryptionTestCase + + +class SchemaTests(EncryptionTestCase): + # Expected encrypted fields map per model + expected_map = { + "Patient": { + "fields": [ + { + "bsonType": "string", + "path": "patient_record.ssn", + "queries": {"queryType": "equality"}, + }, + {"bsonType": "object", "path": "patient_record.billing"}, + ] + }, + "BinaryModel": { + "fields": [ + {"bsonType": "binData", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "BooleanModel": { + "fields": [{"bsonType": "bool", "path": "value", "queries": {"queryType": "equality"}}] + }, + "CharModel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "EmailModel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "GenericIPAddressModel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "TextModel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "URLModel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "BigIntegerModel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "DateModel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + "DateTimeModel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + "DecimalModel": { + "fields": [{"bsonType": "decimal", "path": "value", "queries": {"queryType": "range"}}] + }, + "DurationModel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "FloatModel": { + "fields": [{"bsonType": "double", "path": "value", "queries": {"queryType": "range"}}] + }, + "IntegerModel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "PositiveBigIntegerModel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "PositiveIntegerModel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "PositiveSmallIntegerModel": { + "fields": [{"bsonType": "int", "path": "value", "queries": {"queryType": "range"}}] + }, + "SmallIntegerModel": { + "fields": [{"bsonType": "int", "path": "value", "queries": {"queryType": "range"}}] + }, + "TimeModel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + } + + def test_get_encrypted_fields_all_models(self): + """ + Loops through all models, + checks their encrypted fields map from the schema editor, + and compares to expected BSON type & queries mapping. + """ + # Deleting all keys is only correct only if this test includes all + # test models. This test may not be needed since it's tested when the + # test runner migrates all models. If any subTest fails, the key vault + # will be left in an inconsistent state. + EncryptionKey.objects.all().delete() + connection = connections["encrypted"] + for model_name, expected in self.expected_map.items(): + with self.subTest(model=model_name): + model_class = getattr(models, model_name) + with connection.schema_editor() as editor: + encrypted_fields = editor._get_encrypted_fields(model_class) + for field in encrypted_fields["fields"]: + del field["keyId"] # Can't compare dynamic value + self.assertEqual(encrypted_fields, expected) + + def test_key_creation_and_lookup(self): + """ + Use _get_encrypted_fields to + generate and store a data key in the vault, then + query the vault with the keyAltName. + """ + model_class = models.CharModel + test_key_alt_name = f"{model_class._meta.db_table}.value" + # Delete the test key and verify it's gone. + EncryptionKey.objects.filter(key_alt_name=test_key_alt_name).delete() + with self.assertRaises(EncryptionKey.DoesNotExist): + EncryptionKey.objects.get(key_alt_name=test_key_alt_name) + # Regenerate the keyId. + with connections["encrypted"].schema_editor() as editor: + encrypted_fields = editor._get_encrypted_fields(model_class) + # Validate schema contains a keyId for the field. + field_info = encrypted_fields["fields"][0] + self.assertEqual(field_info["path"], "value") + self.assertIsInstance(field_info["keyId"], Binary) + # Lookup in key vault by the keyAltName. + key = EncryptionKey.objects.get(key_alt_name=test_key_alt_name) + self.assertEqual(key.id, field_info["keyId"]) + self.assertEqual(key.key_alt_name, [test_key_alt_name]) diff --git a/tests/raw_query_/test_raw_aggregate.py b/tests/raw_query_/test_raw_aggregate.py index 99dcd5faf..96df2f925 100644 --- a/tests/raw_query_/test_raw_aggregate.py +++ b/tests/raw_query_/test_raw_aggregate.py @@ -111,7 +111,7 @@ def assertAnnotations(self, results, expected_annotations): self.assertEqual(getattr(result, annotation), value) def test_rawqueryset_repr(self): - queryset = RawQuerySet(pipeline=[]) + queryset = RawQuerySet(pipeline=[], model=Book) self.assertEqual(repr(queryset), "") self.assertEqual(repr(queryset.query), "")