mitodl · dsubak · Oct 3, 2025 · Oct 3, 2025 · Oct 8, 2025 · Oct 8, 2025
diff --git a/main/env_validator.py b/main/env_validator.py
@@ -0,0 +1,276 @@
+"""
+Environment variable validation utilities for detecting configuration discrepancies.
+"""
+
+import logging
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+def strip_comment(val):
+    return val.split("#", 1)[0].strip()
+
+
+"""
+This attempts to enforce the following rules around env files:
+- Base env files (i.e. backend.env) will contain default settings
+that are safe for all development environments
+- Local env files (i.e. backend.local.env) may contain overrides and
+settings which cannot have a sensible default (i.e. developer-specific API keys).
+It should contain everything in the example file as well
+as anything intentionally overridden
+- Example env files list only settings which cannot have a sensible default.
+It should contain the minimum required set of settings values
+necessary to get the application running.
+
+
+Behavior we want to validate:
+1) If a setting is in the example file or local file, but
+not the base file it might be missing a default and we emit a warning.
+1a) If a setting is in the example file or local file but not the
+base because there's no sensible default (i.e. values are specific to a user or local)
+we allow a "# local-required" comment on example file to suppress the warning
+2) If a setting is in the local file and the base file, but the values
+differ we emit a warning. This is to keep users from accidentally overriding defaults
+3) If a setting is in the example file but not the local file, we emit a
+warning. This indicates that we are likely missing a required local-specific setting
+"""
+
+# NB: This runs way too frequently (i.e. in tests). Need to figure out how to
+# Run only on interactive startup
+# Also rewrite so this only allows directives in the expected files
+# (i.e. local-required only in example files)
+
+
+class EnvValidator:
+    """Validates environment variable configurations and reports discrepancies."""
+
+    def __init__(self, project_root: str | None = None):
+        """
+        Initialize the environment validator.
+
+        Args:
+            project_root: Root directory of the project.
+            If None, defaults to parent of this file's directory.
+        """
+        if project_root is None:
+            project_root = Path(__file__).parent.parent
+        else:
+            project_root = Path(project_root)
+
+        self.project_root = project_root
+        self.env_dir = project_root / "env"
+
+    def _parse_env_file(self, file_path: Path) -> dict[str, dict]:
+        """
+        Parse an environment file and return a dictionary of key-value pairs
+        and any noqa-style directives.
+
+        Args:
+            file_path: Path to the environment file
+
+        Returns:
+            Dictionary mapping variable names to dicts with 'value'
+            and optional 'directive'.
+        """
+        env_vars = {}
+
+        if not file_path.exists():
+            return env_vars
+
+        with Path.open(file_path, encoding="utf-8") as f:
+            for original_line in f:
+                line = original_line.strip()
+
+                # Skip empty lines and comments
+                if not line or line.startswith("#"):
+                    continue
+
+                # Check for noqa-style directive at end of line
+                directive = None
+                if line.endswith("# local-required"):
+                    directive = "local-required"
+                    line = line[: -len("# local-required")].rstrip()
+                elif line.endswith("# suppress-warning"):
+                    directive = "suppress-warning"
+                    line = line[: -len("# suppress-warning")].rstrip()
+
+                # Parse regular env variables
+                if "=" in line:
+                    key, value = line.split("=", 1)
+                    key = key.strip()
+                    value = value.strip()
+
+                    env_vars[key] = {"value": value}
+                    if directive:
+                        env_vars[key]["directive"] = directive
+
+        return env_vars
+
+    def get_env_file_pairs(self) -> list[tuple[str, Path, Path, Path]]:
+        """
+        Get pairs of environment files to compare.
+
+        Returns:
+            List of tuples: (env_type, base_env_path, local_env_path, example_env_path)
+        """
+        pairs = []
+
+        # Define the environment file patterns
+        env_patterns = [
+            (
+                "backend",
+                "backend.env",
+                "backend.local.env",
+                "backend.local.example.env",
+            ),
+            (
+                "frontend",
+                "frontend.env",
+                "frontend.local.env",
+                "frontend.local.example.env",
+            ),
+            ("shared", "shared.env", "shared.local.env", "shared.local.example.env"),
+        ]
+
+        for env_type, base_name, local_name, example_name in env_patterns:
+            base_path = self.env_dir / base_name
+            local_path = self.env_dir / local_name
+            example_path = self.env_dir / example_name
+
+            if base_path.exists() or local_path.exists() or example_path.exists():
+                pairs.append((env_type, base_path, local_path, example_path))
+
+        return pairs
+
+    def check_example_overrides(self) -> list[str]:
+        """
+        Check for settings present in example files but not in base env files.
+        This identifies non-standard settings that are overridden in the environment.
+
+        Returns:
+            List of warning messages
+        """
+        warnings = []
+
+        for env_type, base_path, local_path, example_path in self.get_env_file_pairs():
+            base_vars = self._parse_env_file(base_path) if base_path.exists() else {}
+            example_vars = self._parse_env_file(example_path)
+
+            example_only = set(example_vars.keys()) - set(base_vars.keys())
+
+            for var_name in example_only:
+                # Only warn if the variable is present in local file
+                local_vars = self._parse_env_file(local_path)
+                if var_name in local_vars:
+                    example_value = example_vars[var_name]["value"]
+                    local_value = local_vars[var_name]["value"]
+                    if strip_comment(local_value) == strip_comment(example_value):
+                        continue
+                    # Suppress warning if example file cannot have a reasonable default
+                    if example_vars[var_name].get("directive") == "local-required":
+                        continue
+                    warnings.append(
+                        f"⚠️  {env_type.upper()}: Variable "
+                        f"'{var_name}' is set in {local_path.name} "
+                        f"but not defined in {base_path.name}. "
+                        f"This overrides a non-standard setting "
+                        f"from {example_path.name}. "
+                        f"Local value: '{local_value}', "
+                        f"Example value: '{example_value}'"
+                    )
+        return warnings
+
+    def check_local_overrides(self) -> list[str]:
+        """
+        Check for settings in local files that differ from or are absent in base files.
+
+        Returns:
+            List of warning messages
+        """
+        warnings = []
+
+        for env_type, base_path, local_path, _ in self.get_env_file_pairs():
+            if not local_path.exists():
+                continue
+
+            base_vars = self._parse_env_file(base_path) if base_path.exists() else {}
+            local_vars = self._parse_env_file(local_path)
+
+            for var_name, local_info in local_vars.items():
+                local_value = local_info["value"]
+                suppress = local_info.get("directive") == "suppress-warning"
+
+                if var_name not in base_vars:
+                    if suppress:
+                        continue
+                    warnings.append(
+                        f"⚠️  {env_type.upper()}: Variable "
+                        f"'{var_name}' is set in {local_path.name} "
+                        f"(value: '{local_value}') but "
+                        f"not defined in {base_path.name}. "
+                        f"Consider adding a default value to "
+                        f"{base_path.name} if this should be a standard setting."
+                    )
+                else:
+                    base_value = base_vars[var_name]["value"]
+                    # Compare values ignoring anything after a "#" symbol
+                    if strip_comment(base_value) == strip_comment(local_value):
+                        continue  # No warning if values match (ignoring comments)
+                    warnings.append(
+                        f"⚠️  {env_type.upper()}: Variable "
+                        f"'{var_name}' is overridden locally. "
+                        f"Base value: '{base_value}', "
+                        f"Local value: '{local_value}'. "
+                        f"Ensure the default in {base_path.name} is appropriate."
+                    )
+        return warnings
+
+    def validate_all(self) -> list[str]:
+        """
+        Run all environment validation checks.
+
+        Returns:
+            List of all warning messages
+        """
+        warnings = []
+        warnings.extend(self.check_example_overrides())
+        warnings.extend(self.check_local_overrides())
+        return warnings
+
+    def log_warnings(self, warnings: list[str]) -> None:
+        """
+        Log warning messages.
+
+        Args:
+            warnings: List of warning messages to log
+        """
+        if not warnings:
+            logger.info(
+                "✅ Environment validation passed - "
+                "no configuration discrepancies found."
+            )
+            return
+
+        logger.warning("Environment Configuration Warnings:")
+        logger.warning("=" * 60)
+        for warning in warnings:
+            logger.warning(warning)
+        logger.warning("=" * 60)
+        logger.warning(
+            "Found %s environment configuration issue(s). "
+            "Review your environment files to ensure proper configuration.",
+            len(warnings),
+        )
+
+
+def validate_environment_on_startup():
+    """
+    Validate environment configuration on startup.
+    This can be called from Django settings or management commands.
+    """
+    validator = EnvValidator()
+    warnings = validator.validate_all()
+    validator.log_warnings(warnings)
+    return warnings
diff --git a/main/management/commands/validate_env.py b/main/management/commands/validate_env.py
diff --git a/main/settings.py b/main/settings.py
@@ -866,3 +866,14 @@ def get_all_config_keys():
 OPENTELEMETRY_TRACES_BATCH_SIZE = get_int("OPENTELEMETRY_TRACES_BATCH_SIZE", 512)
 OPENTELEMETRY_EXPORT_TIMEOUT_MS = get_int("OPENTELEMETRY_EXPORT_TIMEOUT_MS", 5000)
 CANVAS_TUTORBOT_FOLDER = get_string("CANVAS_TUTORBOT_FOLDER", "web_resources/ai/tutor/")
+
+# Validate environment configuration on startup
+# Skip validation during testing or when explicitly disabled
+if not get_bool("SKIP_ENV_VALIDATION", default=False):
+    try:
+        from main.env_validator import validate_environment_on_startup
+
+        validate_environment_on_startup()
+    except Exception as e:  # noqa: BLE001
+        # We don't want to block if validation fails.
+        log.warning("Environment validation failed: %s", e)
diff --git a/main/test_env_validator.py b/main/test_env_validator.py
@@ -0,0 +1,104 @@
+import tempfile
+from pathlib import Path
+
+from main.env_validator import EnvValidator
+
+
+class TestEnvValidator:
+    def setup_env(self, files, tmpdir):
+        file_paths = {}
+        for fname, lines in files.items():
+            path = Path(tmpdir) / fname
+            with Path.open(path, "w", encoding="utf-8") as f:
+                f.write("\n".join(lines))
+            file_paths[fname] = path
+        return file_paths
+
+    def test_warn_if_setting_in_example_not_base(self):
+        files = {
+            "backend.env": ["BASE=1"],
+            "backend.local.env": ["BASE=1", "EXTRA=foo"],
+            "backend.local.example.env": ["EXTRA=bar"],
+        }
+        with tempfile.TemporaryDirectory() as tmpdir:
+            file_paths = self.setup_env(files, tmpdir)
+            env_file_pairs = [
+                (
+                    "backend",
+                    file_paths["backend.env"],
+                    file_paths["backend.local.env"],
+                    file_paths["backend.local.example.env"],
+                )
+            ]
+            validator = EnvValidator()
+            validator.get_env_file_pairs = lambda: env_file_pairs
+            warnings = validator.validate_all()
+            assert any(
+                "EXTRA" in w and "not defined in backend.env" in w for w in warnings
+            )
+
+    def test_suppress_warning_with_local_required(self):
+        files = {
+            "backend.env": ["BASE=1"],
+            "backend.local.env": ["BASE=1", "EXTRA=foo"],
+            "backend.local.example.env": ["EXTRA=bar # local-required"],
+        }
+        with tempfile.TemporaryDirectory() as tmpdir:
+            file_paths = self.setup_env(files, tmpdir)
+            env_file_pairs = [
+                (
+                    "backend",
+                    file_paths["backend.env"],
+                    file_paths["backend.local.env"],
+                    file_paths["backend.local.example.env"],
+                )
+            ]
+            validator = EnvValidator()
+            validator.get_env_file_pairs = lambda: env_file_pairs
+            warnings = validator.validate_all()
+            assert len(warnings) == 0
+
+    def test_warn_if_local_overrides_base(self):
+        files = {
+            "backend.env": ["BASE=1"],
+            "backend.local.env": ["BASE=2"],
+            "backend.local.example.env": ["BASE=1"],
+        }
+        with tempfile.TemporaryDirectory() as tmpdir:
+            file_paths = self.setup_env(files, tmpdir)
+            env_file_pairs = [
+                (
+                    "backend",
+                    file_paths["backend.env"],
+                    file_paths["backend.local.env"],
+                    file_paths["backend.local.example.env"],
+                )
+            ]
+            validator = EnvValidator()
+            validator.get_env_file_pairs = lambda: env_file_pairs
+            warnings = validator.validate_all()
+            assert any("BASE" in w and "overridden locally" in w for w in warnings)
+
+    def test_warn_if_example_not_in_local(self):
+        files = {
+            "backend.env": ["BASE=1"],
+            "backend.local.env": ["BASE=1"],
+            "backend.local.example.env": ["BASE=1", "EXTRA=bar"],
+        }
+        with tempfile.TemporaryDirectory() as tmpdir:
+            file_paths = self.setup_env(files, tmpdir)
+            env_file_pairs = [
+                (
+                    "backend",
+                    file_paths["backend.env"],
+                    file_paths["backend.local.env"],
+                    file_paths["backend.local.example.env"],
+                )
+            ]
+            validator = EnvValidator()
+            validator.get_env_file_pairs = lambda: env_file_pairs
+            warnings = validator.validate_all()
+            assert any(
+                "EXTRA" in w and "missing a required local-specific setting" in w
+                for w in warnings
+            )