From ef05bd994feac6e8ea6a99b83b33abf7e2b42f71 Mon Sep 17 00:00:00 2001
From: Jonny Dixon <jonny.dixon@acryl.io>
Date: Thu, 13 Nov 2025 15:36:12 +0000
Subject: [PATCH 01/15] fix(ingestion/grafana): fix fails caused by text panels
 in ingestion

---
 .../ingestion/source/grafana/grafana_api.py   |   7 +-
 .../source/grafana/grafana_config.py          |   5 +
 .../source/grafana/grafana_source.py          |   1 +
 .../ingestion/source/grafana/models.py        | 156 +++++++++---------
 .../tests/unit/grafana/test_grafana_models.py | 111 +++++++++++++
 5 files changed, 205 insertions(+), 75 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_api.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_api.py
index 95bd857eca7a48..1c7d50ca43ec75 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_api.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_api.py
@@ -23,11 +23,13 @@ def __init__(
         verify_ssl: bool,
         page_size: int,
         report: GrafanaSourceReport,
+        skip_text_panels: bool = False,
     ) -> None:
         self.base_url = base_url
         self.verify_ssl = verify_ssl
         self.page_size = page_size
         self.report = report
+        self.skip_text_panels = skip_text_panels
         self.session = self._create_session(token)
 
     def _create_session(self, token: SecretStr) -> requests.Session:
@@ -88,7 +90,10 @@ def get_dashboard(self, uid: str) -> Optional[Dashboard]:
         try:
             response = self.session.get(f"{self.base_url}/api/dashboards/uid/{uid}")
             response.raise_for_status()
-            return Dashboard.model_validate(response.json())
+            dashboard_data = response.json()
+            if self.skip_text_panels:
+                dashboard_data["_skip_text_panels"] = True
+            return Dashboard.model_validate(dashboard_data)
         except requests.exceptions.RequestException as e:
             self.report.warning(
                 title="Dashboard Fetch Error",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_config.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_config.py
index e84cdab6da9f9a..b897e25daf0a8a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_config.py
@@ -80,6 +80,11 @@ class GrafanaSourceConfig(
     ingest_owners: bool = Field(
         default=True, description="Whether to ingest dashboard ownership information"
     )
+    skip_text_panels: bool = Field(
+        default=False,
+        description="Whether to skip text panels during ingestion. "
+        "Text panels don't contain data visualizations and may not be relevant for data lineage.",
+    )
 
     include_lineage: bool = Field(
         default=True,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_source.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_source.py
index dc26e7c4a0f631..99cb796de7233e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_source.py
@@ -154,6 +154,7 @@ def __init__(self, config: GrafanaSourceConfig, ctx: PipelineContext):
             verify_ssl=self.config.verify_ssl,
             page_size=self.config.page_size,
             report=self.report,
+            skip_text_panels=self.config.skip_text_panels,
         )
 
         # Initialize lineage extractor with graph
diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
index c035c91037f31b..695b48d17458f2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
@@ -11,49 +11,47 @@
 import logging
 from typing import Any, Dict, List, Optional
 
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 
 from datahub.emitter.mcp_builder import ContainerKey
 
 logger = logging.getLogger(__name__)
-# Grafana-specific type definitions for better type safety
-GrafanaQueryTarget = Dict[
-    str, Any
-]  # Query targets: refId, expr/query, datasource, hide, etc.
-GrafanaFieldConfig = Dict[
-    str, Any
-]  # Field config: defaults, overrides, display settings
-GrafanaTransformation = Dict[str, Any]  # Transformations: id, options
+
+GrafanaQueryTarget = Dict[str, Any]
+GrafanaFieldConfig = Dict[str, Any]
+GrafanaTransformation = Dict[str, Any]
 
 
 class _GrafanaBaseModel(BaseModel):
-    model_config = ConfigDict(coerce_numbers_to_str=True)
+    model_config = ConfigDict(
+        coerce_numbers_to_str=True,
+        populate_by_name=True,
+        extra="ignore",
+    )
 
 
 class DatasourceRef(_GrafanaBaseModel):
     """Reference to a Grafana datasource."""
 
-    type: Optional[str] = None  # Datasource type (prometheus, mysql, postgres, etc.)
-    uid: Optional[str] = None  # Datasource unique identifier
-    name: Optional[str] = None  # Datasource display name
+    type: Optional[str] = None
+    uid: Optional[str] = None
+    name: Optional[str] = None
 
 
 class Panel(_GrafanaBaseModel):
     """Represents a Grafana dashboard panel."""
 
     id: str
-    title: str
+    title: Optional[str] = (
+        None  # Optional: text panels in Grafana v11+ don't have titles
+    )
     description: str = ""
     type: Optional[str] = None
-    # Query targets - each contains refId (A,B,C...), query/expr, datasource ref, etc.
     query_targets: List[GrafanaQueryTarget] = Field(
         default_factory=list, alias="targets"
     )
-    # Datasource reference - contains type, uid, name
     datasource_ref: Optional[DatasourceRef] = Field(default=None, alias="datasource")
-    # Field configuration - display settings, defaults, overrides
     field_config: GrafanaFieldConfig = Field(default_factory=dict, alias="fieldConfig")
-    # Data transformations - each contains id and transformation-specific options
     transformations: List[GrafanaTransformation] = Field(default_factory=list)
 
 
@@ -64,74 +62,84 @@ class Dashboard(_GrafanaBaseModel):
     title: str
     description: str = ""
     version: Optional[str] = None
-    panels: List[Panel]
-    tags: List[str]
+    panels: List[Panel] = Field(default_factory=list)
+    tags: List[str] = Field(default_factory=list)
     timezone: Optional[str] = None
     refresh: Optional[str] = None
     schema_version: Optional[str] = Field(default=None, alias="schemaVersion")
-    folder_id: Optional[str] = Field(default=None, alias="meta.folderId")
+    folder_id: Optional[str] = None
     created_by: Optional[str] = None
 
     @staticmethod
-    def extract_panels(panels_data: List[Dict[str, Any]]) -> List[Panel]:
-        """Extract panels, including nested ones."""
+    def extract_panels(
+        panels_data: List[Dict[str, Any]], skip_text_panels: bool = False
+    ) -> List[Panel]:
+        """Extract panels, including nested ones, skipping invalid panels."""
         panels: List[Panel] = []
         for panel_data in panels_data:
             if panel_data.get("type") == "row" and "panels" in panel_data:
-                panels.extend(
-                    Panel.model_validate(p)
-                    for p in panel_data["panels"]
-                    if p.get("type") != "row"
-                )
+                for p in panel_data["panels"]:
+                    if p.get("type") != "row":
+                        if skip_text_panels and p.get("type") == "text":
+                            continue
+                        try:
+                            panels.append(Panel.model_validate(p))
+                        except Exception as e:
+                            logger.warning(
+                                f"Error parsing panel (id={p.get('id')}, type={p.get('type')}): {e}. Skipping this panel."
+                            )
             elif panel_data.get("type") != "row":
-                panels.append(Panel.model_validate(panel_data))
+                if skip_text_panels and panel_data.get("type") == "text":
+                    continue
+                try:
+                    panels.append(Panel.model_validate(panel_data))
+                except Exception as e:
+                    logger.warning(
+                        f"Error parsing panel (id={panel_data.get('id')}, type={panel_data.get('type')}): {e}. Skipping this panel."
+                    )
         return panels
 
+    @field_validator("refresh", mode="before")
+    @classmethod
+    def convert_refresh_to_string(cls, v: Any) -> Optional[str]:
+        """Convert boolean refresh values to strings for compatibility."""
+        if isinstance(v, bool):
+            return str(v)
+        return v
+
+    @model_validator(mode="before")
     @classmethod
-    def model_validate(
-        cls,
-        obj: Any,
-        *,
-        strict: Optional[bool] = None,
-        from_attributes: Optional[bool] = None,
-        context: Optional[Any] = None,
-        by_alias: Optional[bool] = None,
-        by_name: Optional[bool] = None,
-    ) -> "Dashboard":
-        """Custom parsing to handle nested panel extraction."""
-        # Handle both direct dashboard data and nested structure with 'dashboard' key
-        dashboard_data = obj.get("dashboard", obj)
-
-        _panel_data = dashboard_data.get("panels", [])
-        panels = []
-        try:
-            panels = cls.extract_panels(_panel_data)
-        except Exception as e:
-            logger.warning(
-                f"Error extracting panels from dashboard for dashboard panels {_panel_data} : {e}"
-            )
-
-        # Extract meta.folderId from nested structure
-        meta = dashboard_data.get("meta", {})
-        folder_id = meta.get("folderId")
-
-        # Create dashboard data without meta to avoid conflicts
-        dashboard_dict = {**dashboard_data, "panels": panels, "folder_id": folder_id}
-        if "meta" in dashboard_dict:
-            del dashboard_dict["meta"]
-
-        # Handle refresh field type mismatch - convert boolean to string
-        if "refresh" in dashboard_dict and isinstance(dashboard_dict["refresh"], bool):
-            dashboard_dict["refresh"] = str(dashboard_dict["refresh"])
-
-        return super().model_validate(
-            dashboard_dict,
-            strict=strict,
-            from_attributes=from_attributes,
-            context=context,
-            by_alias=by_alias,
-            by_name=by_name,
-        )
+    def extract_dashboard_data(cls, data: Any) -> Dict[str, Any]:
+        """Extract dashboard data from nested structure and process panels."""
+        if isinstance(data, dict):
+            dashboard_data = data.get("dashboard", data)
+
+            _panel_data = dashboard_data.get("panels", [])
+            panels = []
+
+            skip_text_panels = dashboard_data.get("_skip_text_panels", False)
+
+            if _panel_data and all(isinstance(p, dict) for p in _panel_data):
+                try:
+                    panels = cls.extract_panels(_panel_data, skip_text_panels)
+                except Exception as e:
+                    logger.warning(f"Error extracting panels from dashboard: {e}")
+            else:
+                panels = _panel_data
+
+            meta = dashboard_data.get("meta", {})
+            folder_id = meta.get("folderId") if meta else None
+
+            result = {**dashboard_data, "panels": panels}
+            if folder_id is not None:
+                result["folder_id"] = folder_id
+
+            result.pop("meta", None)
+            result.pop("_skip_text_panels", None)
+
+            return result
+
+        return data
 
 
 class Folder(_GrafanaBaseModel):
@@ -152,4 +160,4 @@ class DashboardContainerKey(ContainerKey):
     """Key for identifying a Grafana dashboard."""
 
     dashboard_id: str
-    folder_id: Optional[str] = None  # Reference to parent folder
+    folder_id: Optional[str] = None
diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_models.py b/metadata-ingestion/tests/unit/grafana/test_grafana_models.py
index 20f69d75136d33..2ae29f763097c6 100644
--- a/metadata-ingestion/tests/unit/grafana/test_grafana_models.py
+++ b/metadata-ingestion/tests/unit/grafana/test_grafana_models.py
@@ -88,6 +88,117 @@ def test_dashboard_nested_panels():
     assert dashboard.panels[1].title == "Top Level Panel"
 
 
+def test_panel_without_title():
+    """Test that text panels without titles are parsed successfully."""
+    panel_data: Dict[str, Any] = {
+        "id": "1",
+        "type": "text",
+        "datasource": {"type": "-- Grafana --", "uid": "-- Grafana --"},
+    }
+
+    panel = Panel.model_validate(panel_data)
+    assert panel.id == "1"
+    assert panel.title is None
+    assert panel.type == "text"
+
+
+def test_dashboard_with_text_panel():
+    """Test that dashboards with text panels (no title) are parsed successfully."""
+    dashboard_data = {
+        "dashboard": {
+            "uid": "dash1",
+            "title": "Test Dashboard",
+            "description": "",
+            "version": "1",
+            "panels": [
+                {"id": "1", "title": "Regular Panel", "type": "graph"},
+                {
+                    "id": "2",
+                    "type": "text",
+                    "datasource": {"type": "-- Grafana --", "uid": "-- Grafana --"},
+                },
+            ],
+            "tags": [],
+        }
+    }
+
+    dashboard = Dashboard.model_validate(dashboard_data)
+    assert len(dashboard.panels) == 2
+    assert dashboard.panels[0].title == "Regular Panel"
+    assert dashboard.panels[1].title is None
+    assert dashboard.panels[1].type == "text"
+
+
+def test_dashboard_with_invalid_panel_skips():
+    """Test that invalid panels are skipped with a warning instead of failing."""
+    dashboard_data = {
+        "dashboard": {
+            "uid": "dash1",
+            "title": "Test Dashboard",
+            "description": "",
+            "version": "1",
+            "panels": [
+                {"id": "1", "title": "Valid Panel", "type": "graph"},
+                # Missing required 'id' field - should be skipped
+                {"title": "Invalid Panel", "type": "graph"},
+                {"id": "3", "title": "Another Valid Panel", "type": "table"},
+            ],
+            "tags": [],
+        }
+    }
+
+    dashboard = Dashboard.model_validate(dashboard_data)
+    # Should have 2 panels (invalid one is skipped)
+    assert len(dashboard.panels) == 2
+    assert dashboard.panels[0].title == "Valid Panel"
+    assert dashboard.panels[1].title == "Another Valid Panel"
+
+
+def test_dashboard_skip_text_panels():
+    """Test that text panels can be skipped when skip_text_panels is True."""
+    dashboard_data_with_text = {
+        "dashboard": {
+            "uid": "dash1",
+            "title": "Test Dashboard",
+            "description": "",
+            "version": "1",
+            "panels": [
+                {"id": "1", "title": "Regular Panel", "type": "graph"},
+                {"id": "2", "type": "text"},
+                {"id": "3", "title": "Another Panel", "type": "table"},
+            ],
+            "tags": [],
+        }
+    }
+
+    # With skip_text_panels=False (default), all panels should be included
+    dashboard_with_text = Dashboard.model_validate(dashboard_data_with_text)
+    assert len(dashboard_with_text.panels) == 3
+
+    # With skip_text_panels=True, text panels should be skipped
+    dashboard_data_skip_text = {
+        "dashboard": {
+            "uid": "dash1",
+            "title": "Test Dashboard",
+            "description": "",
+            "version": "1",
+            "panels": [
+                {"id": "1", "title": "Regular Panel", "type": "graph"},
+                {"id": "2", "type": "text"},
+                {"id": "3", "title": "Another Panel", "type": "table"},
+            ],
+            "tags": [],
+            "_skip_text_panels": True,
+        }
+    }
+    dashboard_no_text = Dashboard.model_validate(dashboard_data_skip_text)
+    assert len(dashboard_no_text.panels) == 2
+    assert dashboard_no_text.panels[0].title == "Regular Panel"
+    assert dashboard_no_text.panels[0].type == "graph"
+    assert dashboard_no_text.panels[1].title == "Another Panel"
+    assert dashboard_no_text.panels[1].type == "table"
+
+
 def test_folder():
     folder_data = {"id": "1", "title": "Test Folder", "description": "Test Description"}
 

From 4d4845135b0f951e5949f3e9745d0cea50d9102f Mon Sep 17 00:00:00 2001
From: Jonny Dixon <jonny.dixon@acryl.io>
Date: Tue, 18 Nov 2025 10:10:45 +0000
Subject: [PATCH 02/15] CUS-6824

---
 .../ingestion/source/grafana/models.py        |  39 ++-
 .../tests/integration/grafana/test_grafana.py |  14 +-
 .../tests/unit/grafana/test_grafana_models.py | 243 ++++++++++++++++++
 .../unit/grafana/test_grafana_validation.py   | 213 +++++++++++++++
 4 files changed, 494 insertions(+), 15 deletions(-)
 create mode 100644 metadata-ingestion/tests/unit/grafana/test_grafana_validation.py

diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
index db11ff527948b1..15e6e9f3d49a23 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
@@ -54,6 +54,23 @@ class Panel(_GrafanaBaseModel):
     field_config: GrafanaFieldConfig = Field(default_factory=dict, alias="fieldConfig")
     transformations: List[GrafanaTransformation] = Field(default_factory=list)
 
+    @model_validator(mode="before")
+    @classmethod
+    def ensure_panel_defaults(cls, data: Any) -> Dict[str, Any]:
+        """Set defaults for optional fields and normalize data types."""
+        if isinstance(data, dict):
+            result = dict(data)
+            result.setdefault("description", "")
+            result.setdefault("targets", [])
+            result.setdefault("transformations", [])
+            result.setdefault("fieldConfig", {})
+
+            if "id" in result and isinstance(result["id"], int):
+                result["id"] = str(result["id"])
+
+            return result
+        return data
+
 
 class Dashboard(_GrafanaBaseModel):
     """Represents a Grafana dashboard."""
@@ -85,8 +102,13 @@ def extract_panels(
                         try:
                             panels.append(Panel.model_validate(p))
                         except Exception as e:
+                            logger.debug(
+                                f"Error parsing nested panel (id={p.get('id')}, type={p.get('type')}): {e}. "
+                                f"Panel data: {p}. Skipping this panel."
+                            )
                             logger.warning(
-                                f"Error parsing panel (id={p.get('id')}, type={p.get('type')}): {e}. Skipping this panel."
+                                f"Skipping panel with validation errors (id={p.get('id')}, type={p.get('type')}): "
+                                f"Missing or invalid fields. Enable debug logging for details."
                             )
             elif panel_data.get("type") != "row":
                 if skip_text_panels and panel_data.get("type") == "text":
@@ -94,8 +116,13 @@ def extract_panels(
                 try:
                     panels.append(Panel.model_validate(panel_data))
                 except Exception as e:
+                    logger.debug(
+                        f"Error parsing panel (id={panel_data.get('id')}, type={panel_data.get('type')}): {e}. "
+                        f"Panel data: {panel_data}. Skipping this panel."
+                    )
                     logger.warning(
-                        f"Error parsing panel (id={panel_data.get('id')}, type={panel_data.get('type')}): {e}. Skipping this panel."
+                        f"Skipping panel with validation errors (id={panel_data.get('id')}, type={panel_data.get('type')}): "
+                        f"Missing or invalid fields. Enable debug logging for details."
                     )
         return panels
 
@@ -134,6 +161,14 @@ def extract_dashboard_data(cls, data: Any) -> Dict[str, Any]:
             if folder_id is not None:
                 result["folder_id"] = folder_id
 
+            # Set defaults for optional fields that may be missing from API responses
+            result.setdefault("tags", [])
+            result.setdefault("description", "")
+            result.setdefault("version", None)
+            result.setdefault("timezone", None)
+            result.setdefault("refresh", None)
+            result.setdefault("created_by", None)
+
             result.pop("meta", None)
             result.pop("_skip_text_panels", None)
 
diff --git a/metadata-ingestion/tests/integration/grafana/test_grafana.py b/metadata-ingestion/tests/integration/grafana/test_grafana.py
index 8b6f8fe9a2bdc5..9607699fb65ad9 100644
--- a/metadata-ingestion/tests/integration/grafana/test_grafana.py
+++ b/metadata-ingestion/tests/integration/grafana/test_grafana.py
@@ -19,9 +19,6 @@
 
 FROZEN_TIME = "2024-07-12 12:00:00"
 
-# Expected dashboards that should be provisioned during the test setup
-# If new dashboards are added to the test setup, they should be listed here
-# to ensure provisioning is complete before running ingestion tests
 EXPECTED_DASHBOARDS = {"Test Integration Dashboard"}
 
 logger = logging.getLogger(__name__)
@@ -142,10 +139,8 @@ def loaded_grafana(docker_compose_runner, test_resources_dir):
     with docker_compose_runner(
         test_resources_dir / "docker-compose.yml", "grafana"
     ) as docker_services:
-        # Wait for all services to be ready
         wait_for_port(docker_services, "postgres", 5432, timeout=90)
 
-        # Prometheus container doesn't have bash, so use a simple HTTP check
         def check_prometheus_ready():
             try:
                 prometheus_port = docker_services.port_for("prometheus", 9090)
@@ -179,12 +174,9 @@ def check_prometheus_ready():
 def verify_grafana_api_ready(docker_services: pytest_docker.plugin.Services) -> None:
     """Robust verification that Grafana API is fully accessible after health check passes"""
 
-    # Configure requests session with retries
     session = build_retry_session(
         total=3, backoff_factor=1, status_forcelist=[500, 502, 503, 504, 429]
     )
-
-    # Wait for API endpoints to be fully ready (health check might pass but API still initializing)
     for attempt in tenacity.Retrying(
         stop=tenacity.stop_after_attempt(60), wait=tenacity.wait_fixed(3), reraise=True
     ):
@@ -238,11 +230,7 @@ def verify_grafana_fully_ready(
 
 
 def verify_grafana_entities_provisioned(timeout: int = 180) -> None:
-    """Wait for Grafana entities to be provisioned before running ingestion tests
-
-    This function can be extended in the future to validate other entity types
-    like data sources, folders, etc.
-    """
+    """Wait for Grafana entities to be provisioned before running ingestion tests"""
     session = build_retry_session(
         total=3, backoff_factor=1, status_forcelist=[500, 502, 503, 504, 429]
     )
diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_models.py b/metadata-ingestion/tests/unit/grafana/test_grafana_models.py
index 2ae29f763097c6..7e30eaaaa3e859 100644
--- a/metadata-ingestion/tests/unit/grafana/test_grafana_models.py
+++ b/metadata-ingestion/tests/unit/grafana/test_grafana_models.py
@@ -206,3 +206,246 @@ def test_folder():
     assert folder.id == "1"
     assert folder.title == "Test Folder"
     assert folder.description == "Test Description"
+
+
+# Tests for validation fixes - handling missing fields gracefully
+
+
+def test_dashboard_missing_tags_field():
+    """Test that dashboard without tags field gets default empty list."""
+    dashboard_data = {
+        "dashboard": {
+            "uid": "dash1",
+            "title": "Test Dashboard",
+            "panels": [],
+            # Missing 'tags' field - should default to []
+        }
+    }
+
+    dashboard = Dashboard.model_validate(dashboard_data)
+    assert dashboard.uid == "dash1"
+    assert dashboard.title == "Test Dashboard"
+    assert dashboard.tags == []  # Should default to empty list
+    assert dashboard.description == ""  # Should default to empty string
+
+
+def test_dashboard_missing_optional_fields():
+    """Test that dashboard with minimal fields gets appropriate defaults."""
+    dashboard_data = {
+        "dashboard": {
+            "uid": "dash1",
+            "title": "Test Dashboard",
+            "panels": [],
+            # Missing: tags, description, version, timezone, refresh, created_by
+        }
+    }
+
+    dashboard = Dashboard.model_validate(dashboard_data)
+    assert dashboard.uid == "dash1"
+    assert dashboard.title == "Test Dashboard"
+    assert dashboard.tags == []
+    assert dashboard.description == ""
+    assert dashboard.version is None
+    assert dashboard.timezone is None
+    assert dashboard.refresh is None
+    assert dashboard.created_by is None
+
+
+def test_panel_missing_optional_fields():
+    """Test that panel with minimal fields gets appropriate defaults."""
+    panel_data = {
+        "id": 123,  # Integer ID - should be converted to string
+        "title": "Test Panel",
+        # Missing: description, targets, transformations, fieldConfig
+    }
+
+    panel = Panel.model_validate(panel_data)
+    assert panel.id == "123"  # Should be converted to string
+    assert panel.title == "Test Panel"
+    assert panel.description == ""  # Should default to empty string
+    assert panel.query_targets == []  # Should default to empty list
+    assert panel.transformations == []  # Should default to empty list
+    assert panel.field_config == {}  # Should default to empty dict
+
+
+def test_panel_integer_id_conversion():
+    """Test that panel IDs are converted from int to string."""
+    panel_data = {
+        "id": 456,  # Integer ID
+        "title": "Test Panel",
+        "type": "graph",
+    }
+
+    panel = Panel.model_validate(panel_data)
+    assert panel.id == "456"
+    assert isinstance(panel.id, str)
+
+
+def test_dashboard_with_panels_missing_fields():
+    """Test dashboard with panels that have missing fields."""
+    dashboard_data = {
+        "dashboard": {
+            "uid": "dash1",
+            "title": "Test Dashboard",
+            "panels": [
+                {
+                    "id": 1,  # Integer ID
+                    "title": "Panel 1",
+                    # Missing description, targets, etc.
+                },
+                {
+                    "id": "2",  # String ID
+                    # Missing title (should be None for text panels)
+                    "type": "text",
+                },
+            ],
+        }
+    }
+
+    dashboard = Dashboard.model_validate(dashboard_data)
+    assert len(dashboard.panels) == 2
+
+    # First panel
+    panel1 = dashboard.panels[0]
+    assert panel1.id == "1"  # Converted to string
+    assert panel1.title == "Panel 1"
+    assert panel1.description == ""
+    assert panel1.query_targets == []
+
+    # Second panel
+    panel2 = dashboard.panels[1]
+    assert panel2.id == "2"
+    assert panel2.title is None  # No title provided
+    assert panel2.type == "text"
+    assert panel2.description == ""
+
+
+def test_dashboard_from_grafana_api_response():
+    """Test parsing a realistic Grafana API response with missing fields."""
+    # Simulates a real Grafana API response that might be missing some fields
+    api_response = {
+        "dashboard": {
+            "uid": "abc123",
+            "title": "Production Dashboard",
+            "id": 450,
+            "panels": [
+                {
+                    "id": 1,
+                    "title": "CPU Usage",
+                    "type": "graph",
+                    "targets": [{"expr": "cpu_usage", "refId": "A"}],
+                },
+                {
+                    "id": 2,
+                    "type": "text",
+                    # No title field for text panel
+                    "datasource": {"type": "-- Grafana --", "uid": "-- Grafana --"},
+                },
+            ],
+            "time": {"from": "now-1h", "to": "now"},
+            "refresh": "5s",
+            "schemaVersion": 30,
+            "version": 1,
+            "meta": {
+                "type": "db",
+                "canSave": True,
+                "canEdit": True,
+                "canAdmin": True,
+                "canStar": True,
+                "slug": "production-dashboard",
+                "url": "/d/abc123/production-dashboard",
+                "expires": "0001-01-01T00:00:00Z",
+                "created": "2024-01-01T10:00:00Z",
+                "updated": "2024-01-02T15:30:00Z",
+                "updatedBy": "admin",
+                "createdBy": "admin",
+                "version": 1,
+                "hasAcl": False,
+                "isFolder": False,
+                "folderId": 0,
+                "folderUid": "",
+                "folderTitle": "General",
+                "folderUrl": "",
+                "provisioned": False,
+                "provisionedExternalId": "",
+            },
+            # Notice: missing 'tags' field entirely
+        }
+    }
+
+    dashboard = Dashboard.model_validate(api_response)
+    assert dashboard.uid == "abc123"
+    assert dashboard.title == "Production Dashboard"
+    assert dashboard.tags == []  # Should default to empty list
+    assert dashboard.version == "1"  # Should be converted to string
+    assert dashboard.refresh == "5s"
+    assert dashboard.folder_id == "0"  # Extracted from meta.folderId
+    assert len(dashboard.panels) == 2
+
+    # First panel should have defaults applied
+    panel1 = dashboard.panels[0]
+    assert panel1.id == "1"
+    assert panel1.title == "CPU Usage"
+    assert panel1.description == ""
+    assert len(panel1.query_targets) == 1
+
+    # Second panel (text) should handle missing title
+    panel2 = dashboard.panels[1]
+    assert panel2.id == "2"
+    assert panel2.title is None
+    assert panel2.type == "text"
+
+
+def test_dashboard_refresh_boolean_conversion():
+    """Test that boolean refresh values are converted to strings."""
+    dashboard_data = {
+        "dashboard": {
+            "uid": "dash1",
+            "title": "Test Dashboard",
+            "panels": [],
+            "refresh": False,  # Boolean value
+        }
+    }
+
+    dashboard = Dashboard.model_validate(dashboard_data)
+    assert dashboard.refresh == "False"  # Should be converted to string
+
+
+def test_nested_panels_with_missing_fields():
+    """Test nested panels (in row panels) with missing fields."""
+    dashboard_data = {
+        "dashboard": {
+            "uid": "dash1",
+            "title": "Test Dashboard",
+            "panels": [
+                {
+                    "type": "row",
+                    "panels": [
+                        {
+                            "id": 1,
+                            "title": "Nested Panel 1",
+                            # Missing description, targets, etc.
+                        },
+                        {
+                            "id": 2,
+                            # Missing title and other fields
+                            "type": "text",
+                        },
+                    ],
+                }
+            ],
+        }
+    }
+
+    dashboard = Dashboard.model_validate(dashboard_data)
+    assert len(dashboard.panels) == 2  # Both nested panels should be extracted
+
+    panel1 = dashboard.panels[0]
+    assert panel1.id == "1"
+    assert panel1.title == "Nested Panel 1"
+    assert panel1.description == ""
+
+    panel2 = dashboard.panels[1]
+    assert panel2.id == "2"
+    assert panel2.title is None
+    assert panel2.type == "text"
diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py
new file mode 100644
index 00000000000000..9a5efa8c087a6c
--- /dev/null
+++ b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py
@@ -0,0 +1,213 @@
+from typing import Any, Dict
+
+from datahub.ingestion.source.grafana.models import Dashboard, Panel
+
+
+def test_dashboard_missing_tags_validation():
+    """Test dashboard validation when tags field is missing."""
+    # Dashboard data without tags field
+    dashboard_data = {
+        "id": 450,
+        "panels": [],
+        "title": "Some Dashboard",
+        "uid": "dashboard-uid",
+        "version": 1,
+        "folder_id": None,
+    }
+
+    dashboard = Dashboard.model_validate(dashboard_data)
+    assert dashboard.uid == "dashboard-uid"
+    assert dashboard.title == "Some Dashboard"
+    assert dashboard.tags == []
+    assert dashboard.version == "1"
+    assert dashboard.folder_id is None
+
+
+def test_panel_validation_with_missing_fields():
+    """Test panel validation when optional fields are missing."""
+    panel_data_list: list[Dict[str, Any]] = [
+        {
+            "id": 1,
+            "type": "graph",
+        },
+        {
+            "id": 2,
+        },
+        {
+            "id": 3,
+            "title": "Chart with missing datasource",
+            "type": "graph",
+        },
+    ]
+
+    for panel_data in panel_data_list:
+        panel = Panel.model_validate(panel_data)
+        assert panel.id == str(panel_data["id"])
+        assert panel.description == ""
+        assert panel.query_targets == []
+        assert panel.transformations == []
+        assert panel.field_config == {}
+
+
+def test_dashboard_completely_minimal():
+    """Test dashboard with only absolutely required fields."""
+    minimal_dashboard = {
+        "uid": "minimal-dash",
+        "title": "Minimal Dashboard",
+    }
+
+    dashboard = Dashboard.model_validate(minimal_dashboard)
+    assert dashboard.uid == "minimal-dash"
+    assert dashboard.title == "Minimal Dashboard"
+    assert dashboard.tags == []
+    assert dashboard.description == ""
+    assert dashboard.panels == []
+    assert dashboard.version is None
+    assert dashboard.timezone is None
+    assert dashboard.refresh is None
+    assert dashboard.created_by is None
+    assert dashboard.folder_id is None
+
+
+def test_panel_completely_minimal():
+    """Test panel with only absolutely required fields."""
+    minimal_panel = {"id": 999}
+
+    panel = Panel.model_validate(minimal_panel)
+    assert panel.id == "999"
+    assert panel.title is None
+    assert panel.description == ""
+    assert panel.type is None
+    assert panel.query_targets == []
+    assert panel.datasource_ref is None
+    assert panel.field_config == {}
+    assert panel.transformations == []
+
+
+def test_dashboard_with_incomplete_panels():
+    """Test dashboard validation containing panels with missing optional fields."""
+    dashboard_data = {
+        "uid": "incomplete-dash",
+        "title": "Dashboard with Incomplete Panels",
+        "panels": [
+            {
+                "id": 1,
+                "type": "graph",
+            },
+            {
+                "id": 2,
+            },
+            {
+                "id": 3,
+                "title": "Panel with Some Fields",
+                "type": "table",
+            },
+        ],
+    }
+
+    dashboard = Dashboard.model_validate(dashboard_data)
+    assert dashboard.uid == "incomplete-dash"
+    assert dashboard.title == "Dashboard with Incomplete Panels"
+    assert dashboard.tags == []
+    assert dashboard.description == ""
+    assert len(dashboard.panels) == 3
+    panel1 = dashboard.panels[0]
+    assert panel1.id == "1"
+    assert panel1.type == "graph"
+    assert panel1.title is None
+    assert panel1.description == ""
+    assert panel1.query_targets == []
+
+    panel2 = dashboard.panels[1]
+    assert panel2.id == "2"
+    assert panel2.title is None
+    assert panel2.type is None  # Allowed to be None
+    assert panel2.description == ""
+    assert panel2.query_targets == []
+
+    panel3 = dashboard.panels[2]
+    assert panel3.id == "3"
+    assert panel3.title == "Panel with Some Fields"
+    assert panel3.type == "table"
+    assert panel3.description == ""
+    assert panel3.query_targets == []
+
+
+def test_realistic_grafana_api_response():
+    """Test validation with a realistic Grafana API response format."""
+    # Simulates a typical Grafana API response with some optional fields missing
+    real_response = {
+        "dashboard": {
+            "id": 450,
+            "uid": "real-dashboard",
+            "title": "Production Metrics",
+            "url": "/d/real-dashboard/production-metrics",
+            "slug": "production-metrics",
+            "type": "db",
+            "panels": [
+                {
+                    "id": 1,
+                    "title": "CPU Usage",
+                    "type": "graph",
+                    "targets": [{"expr": "cpu_usage_percent", "refId": "A"}],
+                    "datasource": {"type": "prometheus", "uid": "prometheus-uid"},
+                },
+                {
+                    "id": 2,
+                    "type": "text",
+                    # Text panel with no title - common in Grafana
+                    "datasource": {"type": "-- Grafana --", "uid": "-- Grafana --"},
+                },
+            ],
+            "time": {"from": "now-1h", "to": "now"},
+            "refresh": "30s",
+            "schemaVersion": 30,
+            "version": 5,
+            "meta": {
+                "type": "db",
+                "canSave": True,
+                "canEdit": True,
+                "slug": "production-metrics",
+                "url": "/d/real-dashboard/production-metrics",
+                "expires": "0001-01-01T00:00:00Z",
+                "created": "2024-01-01T10:00:00Z",
+                "updated": "2024-01-15T14:30:00Z",
+                "updatedBy": "admin",
+                "createdBy": "admin",
+                "version": 5,
+                "hasAcl": False,
+                "isFolder": False,
+                "folderId": 1,
+                "folderUid": "general",
+                "folderTitle": "General",
+                "folderUrl": "",
+                "provisioned": False,
+                "provisionedExternalId": "",
+            },
+            # Note: 'tags' and 'description' fields are not included
+        }
+    }
+
+    dashboard = Dashboard.model_validate(real_response)
+    assert dashboard.uid == "real-dashboard"
+    assert dashboard.title == "Production Metrics"
+    assert dashboard.tags == []  # Should default to empty list
+    assert dashboard.description == ""  # Should default to empty string
+    assert dashboard.version == "5"  # Should be converted to string
+    assert dashboard.refresh == "30s"
+    assert dashboard.folder_id == "1"  # Extracted from meta
+    assert len(dashboard.panels) == 2
+
+    # Verify panels are processed correctly
+    cpu_panel = dashboard.panels[0]
+    assert cpu_panel.id == "1"
+    assert cpu_panel.title == "CPU Usage"
+    assert cpu_panel.type == "graph"
+    assert cpu_panel.description == ""  # Default
+    assert len(cpu_panel.query_targets) == 1
+
+    text_panel = dashboard.panels[1]
+    assert text_panel.id == "2"
+    assert text_panel.title is None  # Text panels often have no title
+    assert text_panel.type == "text"
+    assert text_panel.description == ""  # Default

From 2c0b1e45a5821d534bee8f99f81caf37e19e61c0 Mon Sep 17 00:00:00 2001
From: Jonny Dixon <jonny.dixon@acryl.io>
Date: Tue, 18 Nov 2025 18:03:08 +0000
Subject: [PATCH 03/15] fixing issues

---
 .../ingestion/source/grafana/field_utils.py   | 19 +++--
 .../ingestion/source/grafana/models.py        | 74 ++++++++++++----
 .../unit/grafana/test_grafana_field_utils.py  | 12 +++
 .../unit/grafana/test_grafana_validation.py   | 85 +++++++++++++++++++
 4 files changed, 167 insertions(+), 23 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
index 2164aeca4d7889..026a6a4a0d8bb8 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
@@ -77,8 +77,8 @@ def extract_raw_sql_fields(
     schema_aware = False
 
     if panel and panel.datasource_ref and connection_to_platform_map:
-        ds_type = panel.datasource_ref.type or "unknown"
-        ds_uid = panel.datasource_ref.uid or "unknown"
+        ds_type = getattr(panel.datasource_ref, "type", None) or "unknown"
+        ds_uid = getattr(panel.datasource_ref, "uid", None) or "unknown"
 
         # Try to find mapping by datasource UID first, then by type
         platform_config = connection_to_platform_map.get(
@@ -227,13 +227,16 @@ def extract_fields_from_panel(
 
 
 def extract_fields_from_targets(
-    targets: List[Dict[str, Any]],
+    targets: Optional[List[Dict[str, Any]]],
     panel: Optional[Panel] = None,
     connection_to_platform_map: Optional[Dict[str, Any]] = None,
     graph: Optional[DataHubGraph] = None,
     report: Optional[Any] = None,
 ) -> List[SchemaFieldClass]:
     """Extract fields from panel targets."""
+    if targets is None:
+        return []
+
     fields = []
     for target in targets:
         fields.extend(extract_sql_column_fields(target))
@@ -261,9 +264,12 @@ def extract_time_format_fields(target: Dict[str, Any]) -> List[SchemaFieldClass]
 
 
 def get_fields_from_field_config(
-    field_config: Dict[str, Any],
+    field_config: Optional[Dict[str, Any]],
 ) -> List[SchemaFieldClass]:
     """Extract fields from field configuration."""
+    if field_config is None:
+        return []
+
     fields = []
     defaults = field_config.get("defaults", {})
     unit = defaults.get("unit")
@@ -290,9 +296,12 @@ def get_fields_from_field_config(
 
 
 def get_fields_from_transformations(
-    transformations: List[Dict[str, Any]],
+    transformations: Optional[List[Dict[str, Any]]],
 ) -> List[SchemaFieldClass]:
     """Extract fields from transformations."""
+    if transformations is None:
+        return []
+
     fields = []
     for transform in transformations:
         if transform.get("type") == "organize":
diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
index 15e6e9f3d49a23..40242e6067e51f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
@@ -54,19 +54,49 @@ class Panel(_GrafanaBaseModel):
     field_config: GrafanaFieldConfig = Field(default_factory=dict, alias="fieldConfig")
     transformations: List[GrafanaTransformation] = Field(default_factory=list)
 
+    @staticmethod
+    def _ensure_dict_field(
+        data: Dict[str, Any], field_name: str, default: Dict[str, Any]
+    ) -> None:
+        """Ensure a field is a dict, converting None to the default dict."""
+        if data.get(field_name) is None:
+            data[field_name] = default
+        else:
+            data.setdefault(field_name, default)
+
+    @staticmethod
+    def _ensure_list_field(
+        data: Dict[str, Any], field_name: str, default: List[Any]
+    ) -> None:
+        """Ensure a field is a list, converting None to the default list."""
+        if data.get(field_name) is None:
+            data[field_name] = default
+        else:
+            data.setdefault(field_name, default)
+
+    @staticmethod
+    def _normalize_id_field(data: Dict[str, Any]) -> None:
+        """Convert integer ID to string for consistency."""
+        if "id" in data and isinstance(data["id"], int):
+            data["id"] = str(data["id"])
+
     @model_validator(mode="before")
     @classmethod
     def ensure_panel_defaults(cls, data: Any) -> Dict[str, Any]:
         """Set defaults for optional fields and normalize data types."""
         if isinstance(data, dict):
             result = dict(data)
+
+            # Set basic defaults
             result.setdefault("description", "")
-            result.setdefault("targets", [])
-            result.setdefault("transformations", [])
-            result.setdefault("fieldConfig", {})
 
-            if "id" in result and isinstance(result["id"], int):
-                result["id"] = str(result["id"])
+            # Ensure complex fields are never None
+            cls._ensure_list_field(result, "targets", [])
+            cls._ensure_list_field(result, "transformations", [])
+            cls._ensure_dict_field(result, "fieldConfig", {})
+
+            # Normalize data types
+            cls._normalize_id_field(result)
 
             return result
         return data
@@ -94,9 +124,9 @@ def extract_panels(
         """Extract panels, including nested ones, skipping invalid panels."""
         panels: List[Panel] = []
         for panel_data in panels_data:
-            if panel_data.get("type") == "row" and "panels" in panel_data:
-                for p in panel_data["panels"]:
-                    if p.get("type") != "row":
+            if panel_data.get("type") == "row" and panel_data.get("panels"):
+                for p in panel_data.get("panels", []):
+                    if p and p.get("type") != "row":
                         if skip_text_panels and p.get("type") == "text":
                             continue
                         try:
@@ -126,6 +156,22 @@ def extract_panels(
                     )
         return panels
 
+    @staticmethod
+    def _set_dashboard_defaults(result: Dict[str, Any]) -> None:
+        """Set default values for optional dashboard fields."""
+        result.setdefault("tags", [])
+        result.setdefault("description", "")
+        result.setdefault("version", None)
+        result.setdefault("timezone", None)
+        result.setdefault("refresh", None)
+        result.setdefault("created_by", None)
+
+    @staticmethod
+    def _cleanup_dashboard_metadata(result: Dict[str, Any]) -> None:
+        """Remove internal metadata fields from dashboard data."""
+        result.pop("meta", None)
+        result.pop("_skip_text_panels", None)
+
     @field_validator("refresh", mode="before")
     @classmethod
     def convert_refresh_to_string(cls, v: Any) -> Optional[str]:
@@ -161,16 +207,8 @@ def extract_dashboard_data(cls, data: Any) -> Dict[str, Any]:
             if folder_id is not None:
                 result["folder_id"] = folder_id
 
-            # Set defaults for optional fields that may be missing from API responses
-            result.setdefault("tags", [])
-            result.setdefault("description", "")
-            result.setdefault("version", None)
-            result.setdefault("timezone", None)
-            result.setdefault("refresh", None)
-            result.setdefault("created_by", None)
-
-            result.pop("meta", None)
-            result.pop("_skip_text_panels", None)
+            cls._set_dashboard_defaults(result)
+            cls._cleanup_dashboard_metadata(result)
 
             return result
 
diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
index 60dcee21eb77a6..eacab847060c39 100644
--- a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
+++ b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
@@ -116,3 +116,15 @@ def test_get_fields_from_transformations():
     assert len(fields) == 2
     field_paths = {f.fieldPath for f in fields}
     assert field_paths == {"user", "value"}
+
+
+def test_get_fields_from_field_config_none():
+    """Test that get_fields_from_field_config handles None input gracefully."""
+    fields = get_fields_from_field_config(None)
+    assert fields == []
+
+
+def test_get_fields_from_field_config_empty():
+    """Test that get_fields_from_field_config handles empty dict input."""
+    fields = get_fields_from_field_config({})
+    assert fields == []
diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py
index 9a5efa8c087a6c..37f689169beb00 100644
--- a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py
+++ b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py
@@ -84,6 +84,64 @@ def test_panel_completely_minimal():
     assert panel.transformations == []
 
 
+def test_panel_with_null_field_config():
+    """Test panel validation when fieldConfig is explicitly null."""
+    panel_data = {
+        "id": 123,
+        "fieldConfig": None,
+    }
+
+    panel = Panel.model_validate(panel_data)
+    assert panel.id == "123"
+    assert panel.field_config == {}  # Should be converted to empty dict
+
+
+def test_panel_with_null_transformations():
+    """Test panel validation when transformations is explicitly null."""
+    panel_data = {
+        "id": 456,
+        "transformations": None,
+    }
+
+    panel = Panel.model_validate(panel_data)
+    assert panel.id == "456"
+    assert panel.transformations == []  # Should be converted to empty list
+
+
+def test_panel_with_null_targets():
+    """Test panel validation when targets is explicitly null."""
+    panel_data = {
+        "id": 789,
+        "targets": None,
+    }
+
+    panel = Panel.model_validate(panel_data)
+    assert panel.id == "789"
+    assert panel.query_targets == []  # Should be converted to empty list
+
+
+def test_panel_with_all_null_optional_fields():
+    """Test panel validation when all optional fields are null."""
+    panel_data = {
+        "id": 999,
+        "fieldConfig": None,
+        "transformations": None,
+        "targets": None,
+        "datasource": None,
+        "title": None,
+        "type": None,
+    }
+
+    panel = Panel.model_validate(panel_data)
+    assert panel.id == "999"
+    assert panel.field_config == {}
+    assert panel.transformations == []
+    assert panel.query_targets == []
+    assert panel.datasource_ref is None
+    assert panel.title is None
+    assert panel.type is None
+
+
 def test_dashboard_with_incomplete_panels():
     """Test dashboard validation containing panels with missing optional fields."""
     dashboard_data = {
@@ -133,6 +191,33 @@ def test_dashboard_with_incomplete_panels():
     assert panel3.query_targets == []
 
 
+def test_text_panel_like_real_grafana():
+    """Test a text panel structure like those found in real Grafana dashboards."""
+    # This mimics the structure from our integration test dashboard
+    text_panel_data = {
+        "id": 1,
+        "type": "text",
+        "title": "Dashboard Information",
+        "gridPos": {"x": 0, "y": 0, "w": 24, "h": 3},
+        "options": {
+            "content": "# Test Integration Dashboard\nThis dashboard contains test panels.",
+            "mode": "markdown",
+        },
+        # Note: No fieldConfig, targets, transformations, datasource, or description
+    }
+
+    # Should validate successfully with defaults applied
+    panel = Panel.model_validate(text_panel_data)
+    assert panel.id == "1"
+    assert panel.type == "text"
+    assert panel.title == "Dashboard Information"
+    assert panel.description == ""  # Default applied
+    assert panel.field_config == {}  # Default applied
+    assert panel.query_targets == []  # Default applied
+    assert panel.transformations == []  # Default applied
+    assert panel.datasource_ref is None
+
+
 def test_realistic_grafana_api_response():
     """Test validation with a realistic Grafana API response format."""
     # Simulates a typical Grafana API response with some optional fields missing

From 5afa92311e9ed7d31432f402a60a308f712d4e0a Mon Sep 17 00:00:00 2001
From: Jonny Dixon <jonny.dixon@acryl.io>
Date: Tue, 18 Nov 2025 18:46:29 +0000
Subject: [PATCH 04/15] defensive against nones and nulls

---
 .../ingestion/source/grafana/field_utils.py   | 67 +++++++++++--------
 .../ingestion/source/grafana/models.py        | 28 ++++++--
 .../unit/grafana/test_grafana_field_utils.py  | 31 +++++++--
 3 files changed, 88 insertions(+), 38 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
index 026a6a4a0d8bb8..ba3a8f5c1f3b93 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
@@ -185,13 +185,15 @@ def _extract_raw_sql_fields_fallback(target: Dict[str, Any]) -> List[SchemaField
             # Clean up any remaining quotes or parentheses
             field_name = field_name.strip("\"'()")
 
-            fields.append(
-                SchemaFieldClass(
-                    fieldPath=field_name,
-                    type=SchemaFieldDataTypeClass(type=StringTypeClass()),
-                    nativeDataType="sql_column",
+            # Only create field if field_name is not empty
+            if field_name:
+                fields.append(
+                    SchemaFieldClass(
+                        fieldPath=field_name,
+                        type=SchemaFieldDataTypeClass(type=StringTypeClass()),
+                        nativeDataType="sql_column",
+                    )
                 )
-            )
 
         return fields
 
@@ -208,13 +210,28 @@ def extract_fields_from_panel(
 ) -> List[SchemaFieldClass]:
     """Extract all fields from a panel."""
     fields = []
-    fields.extend(
-        extract_fields_from_targets(
-            panel.query_targets, panel, connection_to_platform_map, graph, report
+
+    # Extract fields from targets (only if there are targets)
+    if panel.safe_query_targets:
+        target_fields = extract_fields_from_targets(
+            panel.safe_query_targets, panel, connection_to_platform_map, graph, report
+        )
+        if target_fields:
+            fields.extend(target_fields)
+
+    # Extract fields from field config (only if there's meaningful config)
+    if panel.safe_field_config:
+        field_config_fields = get_fields_from_field_config(panel.safe_field_config)
+        if field_config_fields:
+            fields.extend(field_config_fields)
+
+    # Extract fields from transformations (only if there are transformations)
+    if panel.safe_transformations:
+        transformation_fields = get_fields_from_transformations(
+            panel.safe_transformations
         )
-    )
-    fields.extend(get_fields_from_field_config(panel.field_config))
-    fields.extend(get_fields_from_transformations(panel.transformations))
+        if transformation_fields:
+            fields.extend(transformation_fields)
 
     # Track schema field extraction
     if report:
@@ -227,15 +244,13 @@ def extract_fields_from_panel(
 
 
 def extract_fields_from_targets(
-    targets: Optional[List[Dict[str, Any]]],
+    targets: List[Dict[str, Any]],
     panel: Optional[Panel] = None,
     connection_to_platform_map: Optional[Dict[str, Any]] = None,
     graph: Optional[DataHubGraph] = None,
     report: Optional[Any] = None,
 ) -> List[SchemaFieldClass]:
     """Extract fields from panel targets."""
-    if targets is None:
-        return []
 
     fields = []
     for target in targets:
@@ -264,11 +279,9 @@ def extract_time_format_fields(target: Dict[str, Any]) -> List[SchemaFieldClass]
 
 
 def get_fields_from_field_config(
-    field_config: Optional[Dict[str, Any]],
+    field_config: Dict[str, Any],
 ) -> List[SchemaFieldClass]:
     """Extract fields from field configuration."""
-    if field_config is None:
-        return []
 
     fields = []
     defaults = field_config.get("defaults", {})
@@ -296,21 +309,21 @@ def get_fields_from_field_config(
 
 
 def get_fields_from_transformations(
-    transformations: Optional[List[Dict[str, Any]]],
+    transformations: List[Dict[str, Any]],
 ) -> List[SchemaFieldClass]:
     """Extract fields from transformations."""
-    if transformations is None:
-        return []
 
     fields = []
     for transform in transformations:
         if transform.get("type") == "organize":
             for field_name in transform.get("options", {}).get("indexByName", {}):
-                fields.append(
-                    SchemaFieldClass(
-                        fieldPath=field_name,
-                        type=SchemaFieldDataTypeClass(type=StringTypeClass()),
-                        nativeDataType="transformed",
+                # Only create field if field_name is not empty
+                if field_name and field_name.strip():
+                    fields.append(
+                        SchemaFieldClass(
+                            fieldPath=field_name.strip(),
+                            type=SchemaFieldDataTypeClass(type=StringTypeClass()),
+                            nativeDataType="transformed",
+                        )
                     )
-                )
     return fields
diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
index 40242e6067e51f..f7d1b5d387a1da 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
@@ -17,8 +17,9 @@
 
 logger = logging.getLogger(__name__)
 
+# Type aliases for Grafana data structures
 GrafanaQueryTarget = Dict[str, Any]
-GrafanaFieldConfig = Dict[str, Any]
+GrafanaFieldConfig = Dict[str, Any]  # Never None, always a dict (possibly empty)
 GrafanaTransformation = Dict[str, Any]
 
 
@@ -54,12 +55,28 @@ class Panel(_GrafanaBaseModel):
     field_config: GrafanaFieldConfig = Field(default_factory=dict, alias="fieldConfig")
     transformations: List[GrafanaTransformation] = Field(default_factory=list)
 
+    @property
+    def safe_field_config(self) -> GrafanaFieldConfig:
+        """Get field_config, guaranteed to be a dict (never None)."""
+        return self.field_config or {}
+
+    @property
+    def safe_query_targets(self) -> List[GrafanaQueryTarget]:
+        """Get query_targets, guaranteed to be a list (never None)."""
+        return self.query_targets or []
+
+    @property
+    def safe_transformations(self) -> List[GrafanaTransformation]:
+        """Get transformations, guaranteed to be a list (never None)."""
+        return self.transformations or []
+
     @staticmethod
     def _ensure_dict_field(
         data: Dict[str, Any], field_name: str, default: Dict[str, Any]
     ) -> None:
-        """Ensure a field is a dict, converting None to the default dict."""
-        if data.get(field_name) is None:
+        """Ensure a field is a dict, converting None/invalid types to the default dict."""
+        value = data.get(field_name)
+        if value is None or not isinstance(value, dict):
             data[field_name] = default
         else:
             data.setdefault(field_name, default)
@@ -68,8 +85,9 @@ def _ensure_dict_field(
     def _ensure_list_field(
         data: Dict[str, Any], field_name: str, default: List[Any]
     ) -> None:
-        """Ensure a field is a list, converting None to the default list."""
-        if data.get(field_name) is None:
+        """Ensure a field is a list, converting None/invalid types to the default list."""
+        value = data.get(field_name)
+        if value is None or not isinstance(value, list):
             data[field_name] = default
         else:
             data.setdefault(field_name, default)
diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
index eacab847060c39..b306bfc06409f6 100644
--- a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
+++ b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
@@ -118,13 +118,32 @@ def test_get_fields_from_transformations():
     assert field_paths == {"user", "value"}
 
 
-def test_get_fields_from_field_config_none():
-    """Test that get_fields_from_field_config handles None input gracefully."""
-    fields = get_fields_from_field_config(None)
-    assert fields == []
-
-
 def test_get_fields_from_field_config_empty():
     """Test that get_fields_from_field_config handles empty dict input."""
     fields = get_fields_from_field_config({})
     assert fields == []
+
+
+def test_extract_fields_from_panel_with_empty_fields():
+    """Test that extract_fields_from_panel handles panels with empty fields efficiently."""
+    from datahub.ingestion.source.grafana.field_utils import extract_fields_from_panel
+    from datahub.ingestion.source.grafana.models import Panel
+
+    # Create a text panel with no targets, fieldConfig, or transformations
+    panel_data = {
+        "id": 1,
+        "type": "text",
+        "title": "Test Panel",
+    }
+
+    panel = Panel.model_validate(panel_data)
+
+    # Verify safe properties work
+    assert panel.safe_field_config == {}
+    assert panel.safe_query_targets == []
+    assert panel.safe_transformations == []
+
+    # Should extract no fields but not crash
+    fields = extract_fields_from_panel(panel)
+    assert isinstance(fields, list)
+    assert len(fields) == 0  # No fields to extract from a text panel

From 50696b69e6fa8616de42f24744e3affa1664e45d Mon Sep 17 00:00:00 2001
From: Jonny Dixon <jonny.dixon@acryl.io>
Date: Tue, 18 Nov 2025 19:04:12 +0000
Subject: [PATCH 05/15] cleaning

---
 .../ingestion/source/grafana/field_utils.py   | 15 ++--
 .../ingestion/source/grafana/models.py        |  6 +-
 .../unit/grafana/test_grafana_field_utils.py  | 77 +++++++++++++++++++
 3 files changed, 87 insertions(+), 11 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
index ba3a8f5c1f3b93..b885f41b13a256 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
@@ -77,13 +77,15 @@ def extract_raw_sql_fields(
     schema_aware = False
 
     if panel and panel.datasource_ref and connection_to_platform_map:
-        ds_type = getattr(panel.datasource_ref, "type", None) or "unknown"
-        ds_uid = getattr(panel.datasource_ref, "uid", None) or "unknown"
+        ds_type = panel.datasource_ref.type
+        ds_uid = panel.datasource_ref.uid
 
-        # Try to find mapping by datasource UID first, then by type
-        platform_config = connection_to_platform_map.get(
-            ds_uid
-        ) or connection_to_platform_map.get(ds_type)
+        # Try to find mapping by datasource UID first (if it exists), then by type
+        platform_config = None
+        if ds_uid:
+            platform_config = connection_to_platform_map.get(ds_uid)
+        if not platform_config and ds_type:
+            platform_config = connection_to_platform_map.get(ds_type)
 
         if platform_config:
             platform = platform_config.platform
@@ -282,7 +284,6 @@ def get_fields_from_field_config(
     field_config: Dict[str, Any],
 ) -> List[SchemaFieldClass]:
     """Extract fields from field configuration."""
-
     fields = []
     defaults = field_config.get("defaults", {})
     unit = defaults.get("unit")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
index f7d1b5d387a1da..c5be3bfbee958e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
@@ -78,8 +78,7 @@ def _ensure_dict_field(
         value = data.get(field_name)
         if value is None or not isinstance(value, dict):
             data[field_name] = default
-        else:
-            data.setdefault(field_name, default)
+        # If value exists and is already a dict, leave it as is
 
     @staticmethod
     def _ensure_list_field(
@@ -89,8 +88,7 @@ def _ensure_list_field(
         value = data.get(field_name)
         if value is None or not isinstance(value, list):
             data[field_name] = default
-        else:
-            data.setdefault(field_name, default)
+        # If value exists and is already a list, leave it as is
 
     @staticmethod
     def _normalize_id_field(data: Dict[str, Any]) -> None:
diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
index b306bfc06409f6..bfdb7a333a820f 100644
--- a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
+++ b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
@@ -147,3 +147,80 @@ def test_extract_fields_from_panel_with_empty_fields():
     fields = extract_fields_from_panel(panel)
     assert isinstance(fields, list)
     assert len(fields) == 0  # No fields to extract from a text panel
+
+
+def test_datasource_ref_field_access():
+    """Test that datasource_ref fields are accessed correctly without getattr."""
+    from datahub.ingestion.source.grafana.models import Panel
+
+    # Test panel with datasource_ref having both type and uid
+    panel_with_datasource = Panel.model_validate(
+        {
+            "id": 1,
+            "type": "graph",
+            "datasource": {"type": "prometheus", "uid": "prometheus-uid-123"},
+        }
+    )
+
+    # Verify we can access fields directly
+    assert panel_with_datasource.datasource_ref is not None
+    assert panel_with_datasource.datasource_ref.type == "prometheus"
+    assert panel_with_datasource.datasource_ref.uid == "prometheus-uid-123"
+
+    # Test panel with partial datasource_ref (only type)
+    panel_with_partial_datasource = Panel.model_validate(
+        {
+            "id": 2,
+            "type": "graph",
+            "datasource": {
+                "type": "mysql"
+                # No uid field
+            },
+        }
+    )
+
+    assert panel_with_partial_datasource.datasource_ref is not None
+    assert panel_with_partial_datasource.datasource_ref.type == "mysql"
+    assert panel_with_partial_datasource.datasource_ref.uid is None
+
+    # Test panel with no datasource_ref
+    panel_without_datasource = Panel.model_validate(
+        {
+            "id": 3,
+            "type": "text",
+            # No datasource field
+        }
+    )
+
+    assert panel_without_datasource.datasource_ref is None
+
+
+def test_extract_raw_sql_fields_with_text_panel():
+    """Test that extract_raw_sql_fields handles text panels (no datasource) correctly."""
+    from datahub.ingestion.source.grafana.field_utils import extract_raw_sql_fields
+    from datahub.ingestion.source.grafana.models import Panel
+
+    # Create a text panel with no datasource (like in real Grafana)
+    text_panel = Panel.model_validate(
+        {
+            "id": 1,
+            "type": "text",
+            "title": "Text Panel",
+            # No datasource field - this is normal for text panels
+        }
+    )
+
+    # Should not crash when processing a target with rawSql but no datasource
+    target_with_sql = {"rawSql": "SELECT * FROM test_table"}
+
+    # This should work without error, even though panel.datasource_ref is None
+    fields = extract_raw_sql_fields(
+        target=target_with_sql,
+        panel=text_panel,
+        connection_to_platform_map={"postgres": "some_config"},
+        graph=None,
+        report=None,
+    )
+
+    # Should return some fields (fallback parsing) or empty list, but not crash
+    assert isinstance(fields, list)

From 97ae68e8d75bd079481fc44031c0caa8b1862558 Mon Sep 17 00:00:00 2001
From: Jonny Dixon <jonny.dixon@acryl.io>
Date: Tue, 18 Nov 2025 19:17:34 +0000
Subject: [PATCH 06/15] defensive checks

---
 .../ingestion/source/grafana/field_utils.py   | 15 ++++---
 .../ingestion/source/grafana/models.py        | 14 ++++++-
 .../unit/grafana/test_grafana_field_utils.py  |  6 +++
 .../unit/grafana/test_grafana_validation.py   | 39 +++++++++++++++++++
 4 files changed, 66 insertions(+), 8 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
index b885f41b13a256..3b7d318d78bb91 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
@@ -221,11 +221,10 @@ def extract_fields_from_panel(
         if target_fields:
             fields.extend(target_fields)
 
-    # Extract fields from field config (only if there's meaningful config)
-    if panel.safe_field_config:
-        field_config_fields = get_fields_from_field_config(panel.safe_field_config)
-        if field_config_fields:
-            fields.extend(field_config_fields)
+    # Extract fields from field config
+    field_config_fields = get_fields_from_field_config(panel.field_config)
+    if field_config_fields:
+        fields.extend(field_config_fields)
 
     # Extract fields from transformations (only if there are transformations)
     if panel.safe_transformations:
@@ -281,9 +280,12 @@ def extract_time_format_fields(target: Dict[str, Any]) -> List[SchemaFieldClass]
 
 
 def get_fields_from_field_config(
-    field_config: Dict[str, Any],
+    field_config: Optional[Dict[str, Any]],
 ) -> List[SchemaFieldClass]:
     """Extract fields from field configuration."""
+    if field_config is None:
+        return []
+
     fields = []
     defaults = field_config.get("defaults", {})
     unit = defaults.get("unit")
@@ -295,6 +297,7 @@ def get_fields_from_field_config(
                 nativeDataType="value",
             )
         )
+
     for override in field_config.get("overrides", []):
         if override.get("matcher", {}).get("id") == "byName":
             field_name = override.get("matcher", {}).get("options")
diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
index c5be3bfbee958e..026bc84ba53de2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
@@ -103,8 +103,18 @@ def ensure_panel_defaults(cls, data: Any) -> Dict[str, Any]:
         if isinstance(data, dict):
             result = dict(data)
 
-            # Set basic defaults
-            result.setdefault("description", "")
+            # Set basic defaults - handle None values
+            if result.get("description") is None:
+                result["description"] = ""
+
+            # Handle datasource field - convert invalid types to None
+            datasource = result.get("datasource")
+            if isinstance(datasource, str):
+                # Handle template variables like '$datasource' or other string values
+                result["datasource"] = None
+            elif datasource is not None and not isinstance(datasource, dict):
+                # Handle any other invalid types
+                result["datasource"] = None
 
             # Ensure complex fields are never None
             cls._ensure_list_field(result, "targets", [])
diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
index bfdb7a333a820f..5d9774904c9715 100644
--- a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
+++ b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
@@ -124,6 +124,12 @@ def test_get_fields_from_field_config_empty():
     assert fields == []
 
 
+def test_get_fields_from_field_config_none():
+    """Test that get_fields_from_field_config handles None input gracefully."""
+    fields = get_fields_from_field_config(None)
+    assert fields == []
+
+
 def test_extract_fields_from_panel_with_empty_fields():
     """Test that extract_fields_from_panel handles panels with empty fields efficiently."""
     from datahub.ingestion.source.grafana.field_utils import extract_fields_from_panel
diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py
index 37f689169beb00..9ddcf4b69d0e61 100644
--- a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py
+++ b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py
@@ -218,6 +218,45 @@ def test_text_panel_like_real_grafana():
     assert panel.datasource_ref is None
 
 
+def test_panel_with_null_description():
+    """Test that panels with explicit None description are handled correctly."""
+    panel_data = {
+        "id": 18,
+        "type": "timeseries",
+        "title": "Test Panel",
+        "description": None,  # Explicit None value from Grafana API
+        "datasource": {"type": "prometheus", "uid": "test-uid"},
+        "fieldConfig": {"defaults": {"unit": "short"}},
+        "targets": [{"expr": "up"}],
+    }
+
+    # Should validate successfully with None converted to empty string
+    panel = Panel.model_validate(panel_data)
+    assert panel.description == ""  # None converted to empty string
+    assert panel.id == "18"
+    assert panel.type == "timeseries"
+    assert panel.title == "Test Panel"
+
+
+def test_panel_with_string_datasource():
+    """Test that panels with string datasource (template variables) are handled correctly."""
+    panel_data = {
+        "id": 35,
+        "type": "timeseries",
+        "title": "Sent records",
+        "datasource": "$datasource",  # String template variable from Grafana
+        "fieldConfig": {"defaults": {"unit": "short"}},
+        "targets": [{"datasource": "$datasource", "expr": "up"}],
+    }
+
+    # Should validate successfully with string datasource converted to None
+    panel = Panel.model_validate(panel_data)
+    assert panel.datasource_ref is None  # String converted to None
+    assert panel.id == "35"
+    assert panel.type == "timeseries"
+    assert panel.title == "Sent records"
+
+
 def test_realistic_grafana_api_response():
     """Test validation with a realistic Grafana API response format."""
     # Simulates a typical Grafana API response with some optional fields missing

From 302b3e31bb4c5336ef06d10887d94d301deca604 Mon Sep 17 00:00:00 2001
From: Jonny Dixon <jonny.dixon@acryl.io>
Date: Tue, 18 Nov 2025 19:32:50 +0000
Subject: [PATCH 07/15] catering for no panel id

---
 .../ingestion/source/grafana/models.py        | 11 ++++--
 .../tests/unit/grafana/test_grafana_models.py | 14 +++++---
 .../unit/grafana/test_grafana_validation.py   | 35 +++++++++++++++++++
 3 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
index 026bc84ba53de2..fe4da6b35259b0 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
@@ -92,8 +92,15 @@ def _ensure_list_field(
 
     @staticmethod
     def _normalize_id_field(data: Dict[str, Any]) -> None:
-        """Convert integer ID to string for consistency."""
-        if "id" in data and isinstance(data["id"], int):
+        """Convert integer ID to string and generate fallback ID if missing."""
+        if "id" not in data or data["id"] is None:
+            # Generate a fallback ID based on panel type and title
+            panel_type = data.get("type", "unknown")
+            title = data.get("title", "untitled")
+            # Create a simple hash-like ID from type and title
+            fallback_id = f"{panel_type}_{hash(title) % 10000}"
+            data["id"] = fallback_id
+        elif isinstance(data["id"], int):
             data["id"] = str(data["id"])
 
     @model_validator(mode="before")
diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_models.py b/metadata-ingestion/tests/unit/grafana/test_grafana_models.py
index 7e30eaaaa3e859..08d7883ae06952 100644
--- a/metadata-ingestion/tests/unit/grafana/test_grafana_models.py
+++ b/metadata-ingestion/tests/unit/grafana/test_grafana_models.py
@@ -130,7 +130,7 @@ def test_dashboard_with_text_panel():
 
 
 def test_dashboard_with_invalid_panel_skips():
-    """Test that invalid panels are skipped with a warning instead of failing."""
+    """Test that panels with missing IDs get generated fallback IDs."""
     dashboard_data = {
         "dashboard": {
             "uid": "dash1",
@@ -139,7 +139,7 @@ def test_dashboard_with_invalid_panel_skips():
             "version": "1",
             "panels": [
                 {"id": "1", "title": "Valid Panel", "type": "graph"},
-                # Missing required 'id' field - should be skipped
+                # Missing 'id' field - should get generated ID
                 {"title": "Invalid Panel", "type": "graph"},
                 {"id": "3", "title": "Another Valid Panel", "type": "table"},
             ],
@@ -148,10 +148,14 @@ def test_dashboard_with_invalid_panel_skips():
     }
 
     dashboard = Dashboard.model_validate(dashboard_data)
-    # Should have 2 panels (invalid one is skipped)
-    assert len(dashboard.panels) == 2
+    # Should have 3 panels (missing ID gets generated)
+    assert len(dashboard.panels) == 3
     assert dashboard.panels[0].title == "Valid Panel"
-    assert dashboard.panels[1].title == "Another Valid Panel"
+    assert dashboard.panels[0].id == "1"
+    assert dashboard.panels[1].title == "Invalid Panel"
+    assert dashboard.panels[1].id.startswith("graph_")  # Generated ID
+    assert dashboard.panels[2].title == "Another Valid Panel"
+    assert dashboard.panels[2].id == "3"
 
 
 def test_dashboard_skip_text_panels():
diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py
index 9ddcf4b69d0e61..78243ae5ef192c 100644
--- a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py
+++ b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py
@@ -257,6 +257,41 @@ def test_panel_with_string_datasource():
     assert panel.title == "Sent records"
 
 
+def test_panel_with_missing_id():
+    """Test that panels with missing id field get a generated fallback ID."""
+    panel_data = {
+        # No id field - common in some text panels
+        "type": "text",
+        "title": "Performance Testing (k6)",
+        "options": {"content": "# Performance Testing", "mode": "markdown"},
+        "gridPos": {"h": 4, "w": 24, "x": 0, "y": 0},
+    }
+
+    # Should validate successfully with generated ID
+    panel = Panel.model_validate(panel_data)
+    assert panel.id is not None  # ID was generated
+    assert panel.id.startswith("text_")  # Generated based on type
+    assert panel.type == "text"
+    assert panel.title == "Performance Testing (k6)"
+
+
+def test_panel_with_null_id():
+    """Test that panels with explicit None id field get a generated fallback ID."""
+    panel_data = {
+        "id": None,  # Explicit None
+        "type": "text",
+        "title": "Test Panel",
+        "options": {"content": "Test content"},
+    }
+
+    # Should validate successfully with generated ID
+    panel = Panel.model_validate(panel_data)
+    assert panel.id is not None  # ID was generated
+    assert panel.id.startswith("text_")  # Generated based on type
+    assert panel.type == "text"
+    assert panel.title == "Test Panel"
+
+
 def test_realistic_grafana_api_response():
     """Test validation with a realistic Grafana API response format."""
     # Simulates a typical Grafana API response with some optional fields missing

From e2a35b8e7dacace9465d8aae313c3c1fb3eff6d9 Mon Sep 17 00:00:00 2001
From: Jonny Dixon <jonny.dixon@acryl.io>
Date: Tue, 18 Nov 2025 19:36:02 +0000
Subject: [PATCH 08/15] deterministic fallback for multiple panels with no id

---
 .../ingestion/source/grafana/models.py        | 26 ++++++++-
 .../unit/grafana/test_grafana_validation.py   | 56 +++++++++++++++++++
 2 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
index fe4da6b35259b0..86a55c6bb437a9 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py
@@ -94,11 +94,31 @@ def _ensure_list_field(
     def _normalize_id_field(data: Dict[str, Any]) -> None:
         """Convert integer ID to string and generate fallback ID if missing."""
         if "id" not in data or data["id"] is None:
-            # Generate a fallback ID based on panel type and title
+            # Generate a deterministic fallback ID based on multiple panel properties
             panel_type = data.get("type", "unknown")
             title = data.get("title", "untitled")
-            # Create a simple hash-like ID from type and title
-            fallback_id = f"{panel_type}_{hash(title) % 10000}"
+
+            # Include additional properties for uniqueness
+            grid_pos = data.get("gridPos", {})
+            x = grid_pos.get("x", 0)
+            y = grid_pos.get("y", 0)
+            w = grid_pos.get("w", 0)
+            h = grid_pos.get("h", 0)
+
+            # Create a deterministic identifier from multiple properties
+            # This ensures uniqueness even for panels with identical type/title
+            identifier_parts = [
+                panel_type,
+                title,
+                str(x),
+                str(y),
+                str(w),
+                str(h),  # Grid position for uniqueness
+            ]
+            identifier_string = "_".join(identifier_parts)
+
+            # Use hash for consistent ID generation across runs
+            fallback_id = f"{panel_type}_{abs(hash(identifier_string)) % 100000}"
             data["id"] = fallback_id
         elif isinstance(data["id"], int):
             data["id"] = str(data["id"])
diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py
index 78243ae5ef192c..ae741e44e93528 100644
--- a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py
+++ b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py
@@ -292,6 +292,62 @@ def test_panel_with_null_id():
     assert panel.title == "Test Panel"
 
 
+def test_multiple_panels_without_id_or_title():
+    """Test that multiple panels without ID or title get unique, deterministic IDs."""
+    # Two panels with no ID, no title, but different grid positions
+    panel1_data = {
+        "type": "text",
+        "gridPos": {"x": 0, "y": 0, "w": 12, "h": 4},
+        "options": {"content": "Panel 1"},
+    }
+
+    panel2_data = {
+        "type": "text",
+        "gridPos": {"x": 12, "y": 0, "w": 12, "h": 4},
+        "options": {"content": "Panel 2"},
+    }
+
+    # Validate both panels
+    panel1 = Panel.model_validate(panel1_data)
+    panel2 = Panel.model_validate(panel2_data)
+
+    # Both should have generated IDs
+    assert panel1.id is not None
+    assert panel2.id is not None
+    assert panel1.id.startswith("text_")
+    assert panel2.id.startswith("text_")
+
+    # IDs should be different due to different grid positions
+    assert panel1.id != panel2.id
+
+    # IDs should be deterministic - same input produces same ID
+    panel1_duplicate = Panel.model_validate(panel1_data)
+    panel2_duplicate = Panel.model_validate(panel2_data)
+    assert panel1.id == panel1_duplicate.id
+    assert panel2.id == panel2_duplicate.id
+
+
+def test_panels_identical_except_position():
+    """Test that identical panels in different positions get different IDs."""
+    base_panel_data = {
+        "type": "text",
+        "title": "Same Title",
+        "options": {"content": "Same content"},
+    }
+
+    # Same panel in different positions
+    panel1_data = {**base_panel_data, "gridPos": {"x": 0, "y": 0, "w": 12, "h": 4}}
+    panel2_data = {**base_panel_data, "gridPos": {"x": 0, "y": 4, "w": 12, "h": 4}}
+
+    panel1 = Panel.model_validate(panel1_data)
+    panel2 = Panel.model_validate(panel2_data)
+
+    # Should have different IDs due to different positions
+    assert panel1.id != panel2.id
+    assert panel1.id.startswith("text_")
+    assert panel2.id.startswith("text_")
+
+
 def test_realistic_grafana_api_response():
     """Test validation with a realistic Grafana API response format."""
     # Simulates a typical Grafana API response with some optional fields missing

From afad92b95b11ec0308d34ac6aecb0593fecfab5b Mon Sep 17 00:00:00 2001
From: Jonny Dixon <jonny.dixon@acryl.io>
Date: Tue, 18 Nov 2025 19:47:24 +0000
Subject: [PATCH 09/15] guarantees that string config is always populated

---
 .../ingestion/source/grafana/field_utils.py   | 22 ++++++++++++---
 .../unit/grafana/test_grafana_field_utils.py  | 28 +++++++++++++++++--
 2 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
index 3b7d318d78bb91..e4dcf5680aff0c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
@@ -221,8 +221,8 @@ def extract_fields_from_panel(
         if target_fields:
             fields.extend(target_fields)
 
-    # Extract fields from field config
-    field_config_fields = get_fields_from_field_config(panel.field_config)
+    # Extract fields from field config - use safe property to ensure non-None
+    field_config_fields = get_fields_from_field_config(panel.safe_field_config)
     if field_config_fields:
         fields.extend(field_config_fields)
 
@@ -280,10 +280,15 @@ def extract_time_format_fields(target: Dict[str, Any]) -> List[SchemaFieldClass]
 
 
 def get_fields_from_field_config(
-    field_config: Optional[Dict[str, Any]],
+    field_config: Dict[str, Any],
 ) -> List[SchemaFieldClass]:
     """Extract fields from field configuration."""
+    # Ultimate safety check - this should never happen but let's catch it with detailed info
     if field_config is None:
+        logger.error(
+            f"CRITICAL: get_fields_from_field_config received None field_config. "
+            f"This indicates a serious bug. Type: {type(field_config)}, Value: {field_config}"
+        )
         return []
 
     fields = []
@@ -298,7 +303,16 @@ def get_fields_from_field_config(
             )
         )
 
-    for override in field_config.get("overrides", []):
+    # Additional safety check before iterating
+    overrides = field_config.get("overrides", [])
+    if overrides is None:
+        logger.error(
+            f"CRITICAL: field_config.get('overrides', []) returned None. "
+            f"field_config type: {type(field_config)}, field_config: {field_config}"
+        )
+        overrides = []
+
+    for override in overrides:
         if override.get("matcher", {}).get("id") == "byName":
             field_name = override.get("matcher", {}).get("options")
             if field_name:
diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
index 5d9774904c9715..faa45fc4bd6a1e 100644
--- a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
+++ b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
@@ -125,11 +125,35 @@ def test_get_fields_from_field_config_empty():
 
 
 def test_get_fields_from_field_config_none():
-    """Test that get_fields_from_field_config handles None input gracefully."""
-    fields = get_fields_from_field_config(None)
+    """Test that get_fields_from_field_config handles empty dict input (via safe property)."""
+    # The safe_field_config property ensures we never pass None to this function
+    fields = get_fields_from_field_config({})
     assert fields == []
 
 
+def test_panel_safe_field_config_property():
+    """Test that Panel.safe_field_config always returns a dict, never None."""
+    from datahub.ingestion.source.grafana.models import Panel
+
+    # Test with explicit None field_config (should be converted by model validator)
+    panel_data = {
+        "id": "test1",
+        "type": "text",
+        "fieldConfig": None,  # This should be converted to {} by the model validator
+    }
+
+    panel = Panel.model_validate(panel_data)
+
+    # The safe_field_config property should always return a dict
+    safe_config = panel.safe_field_config
+    assert isinstance(safe_config, dict)
+    assert safe_config == {}  # Should be empty dict, not None
+
+    # The actual field_config should also be a dict after validation
+    assert isinstance(panel.field_config, dict)
+    assert panel.field_config == {}
+
+
 def test_extract_fields_from_panel_with_empty_fields():
     """Test that extract_fields_from_panel handles panels with empty fields efficiently."""
     from datahub.ingestion.source.grafana.field_utils import extract_fields_from_panel

From 072c7d3d3f0fd16e5b2c2de5d93879ba681e8d81 Mon Sep 17 00:00:00 2001
From: Jonny Dixon <jonny.dixon@acryl.io>
Date: Tue, 18 Nov 2025 19:58:50 +0000
Subject: [PATCH 10/15] Update field_utils.py

---
 .../ingestion/source/grafana/field_utils.py   | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
index e4dcf5680aff0c..3cb35f4344af5a 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
@@ -283,14 +283,6 @@ def get_fields_from_field_config(
     field_config: Dict[str, Any],
 ) -> List[SchemaFieldClass]:
     """Extract fields from field configuration."""
-    # Ultimate safety check - this should never happen but let's catch it with detailed info
-    if field_config is None:
-        logger.error(
-            f"CRITICAL: get_fields_from_field_config received None field_config. "
-            f"This indicates a serious bug. Type: {type(field_config)}, Value: {field_config}"
-        )
-        return []
-
     fields = []
     defaults = field_config.get("defaults", {})
     unit = defaults.get("unit")
@@ -303,16 +295,7 @@ def get_fields_from_field_config(
             )
         )
 
-    # Additional safety check before iterating
-    overrides = field_config.get("overrides", [])
-    if overrides is None:
-        logger.error(
-            f"CRITICAL: field_config.get('overrides', []) returned None. "
-            f"field_config type: {type(field_config)}, field_config: {field_config}"
-        )
-        overrides = []
-
-    for override in overrides:
+    for override in field_config.get("overrides", []):
         if override.get("matcher", {}).get("id") == "byName":
             field_name = override.get("matcher", {}).get("options")
             if field_name:

From 32b3e9dbe0447a8f7fb31c21258407d291ff527e Mon Sep 17 00:00:00 2001
From: Jonny Dixon <jonny.dixon@acryl.io>
Date: Tue, 18 Nov 2025 20:08:39 +0000
Subject: [PATCH 11/15] Update field_utils.py

---
 .../src/datahub/ingestion/source/grafana/field_utils.py        | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
index 3cb35f4344af5a..15c6097b6f3cdd 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
@@ -295,7 +295,8 @@ def get_fields_from_field_config(
             )
         )
 
-    for override in field_config.get("overrides", []):
+    overrides = field_config.get("overrides") or []
+    for override in overrides:
         if override.get("matcher", {}).get("id") == "byName":
             field_name = override.get("matcher", {}).get("options")
             if field_name:

From cfaa0e7d1a31326f00ec607c2a860aafd5db05d0 Mon Sep 17 00:00:00 2001
From: Jonny Dixon <jonny.dixon@acryl.io>
Date: Wed, 19 Nov 2025 16:54:37 +0000
Subject: [PATCH 12/15] rawSQL fixes

---
 .../ingestion/source/grafana/field_utils.py   | 18 ++++++++++++---
 .../ingestion/source/grafana/lineage.py       |  6 +++--
 .../unit/grafana/test_grafana_field_utils.py  | 23 +++++++++++++++++++
 3 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
index 15c6097b6f3cdd..5ad5422fc59516 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py
@@ -64,7 +64,13 @@ def extract_raw_sql_fields(
     report: Optional[Any] = None,
 ) -> List[SchemaFieldClass]:
     """Extract fields from raw SQL queries using DataHub's SQL parsing."""
-    raw_sql = target.get("rawSql", "")
+    # Handle case variations: rawSql, rawSQL, etc.
+    raw_sql = ""
+    for key, value in target.items():
+        if key.lower() == "rawsql" and value:
+            raw_sql = value
+            break
+
     if not raw_sql:
         return []
 
@@ -143,7 +149,13 @@ def extract_raw_sql_fields(
 
 def _extract_raw_sql_fields_fallback(target: Dict[str, Any]) -> List[SchemaFieldClass]:
     """Fallback basic SQL parsing for when sqlglot fails."""
-    raw_sql = target.get("rawSql", "").lower()
+    # Handle case variations: rawSql, rawSQL, etc.
+    raw_sql = ""
+    for key, value in target.items():
+        if key.lower() == "rawsql" and value:
+            raw_sql = value
+            break
+
     if not raw_sql:
         return []
 
@@ -200,7 +212,7 @@ def _extract_raw_sql_fields_fallback(target: Dict[str, Any]) -> List[SchemaField
         return fields
 
     except (IndexError, ValueError, StopIteration) as e:
-        logger.warning(f"Failed to parse SQL: {target.get('rawSql')}", e)
+        logger.warning(f"Failed to parse SQL: {raw_sql}", e)
         return []
 
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/lineage.py
index 57bca941690d19..ace1ed4f93e74f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/lineage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/lineage.py
@@ -89,8 +89,10 @@ def _extract_raw_sql(
     ) -> Optional[str]:
         """Extract raw SQL from panel query targets."""
         for target in query_targets:
-            if target.get("rawSql"):
-                return target["rawSql"]
+            # Handle case variations: rawSql, rawSQL, etc.
+            for key, value in target.items():
+                if key.lower() == "rawsql" and value:
+                    return value
         return None
 
     def _build_dataset_urn(self, ds_type: str, ds_uid: str, panel_id: str) -> str:
diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
index faa45fc4bd6a1e..48fb7dcf4ef953 100644
--- a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
+++ b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py
@@ -67,6 +67,29 @@ def test_extract_raw_sql_fields():
     assert fields[1].fieldPath == "request_count"
 
 
+def test_extract_raw_sql_fields_case_insensitive():
+    """Test that extract_raw_sql_fields handles different case variations of rawSql."""
+    sql_query = "SELECT name as user_name, count as request_count FROM requests"
+
+    # Test different case variations
+    target_rawSql = {"rawSql": sql_query}
+    target_rawSQL = {"rawSQL": sql_query}
+    target_mixed = {"RawSQL": sql_query}
+
+    fields1 = extract_raw_sql_fields(target_rawSql)
+    fields2 = extract_raw_sql_fields(target_rawSQL)
+    fields3 = extract_raw_sql_fields(target_mixed)
+
+    # All should extract the same fields
+    assert len(fields1) == 2
+    assert len(fields2) == 2
+    assert len(fields3) == 2
+
+    # Field names should be the same
+    assert fields1[0].fieldPath == fields2[0].fieldPath == fields3[0].fieldPath
+    assert fields1[1].fieldPath == fields2[1].fieldPath == fields3[1].fieldPath
+
+
 def test_extract_raw_sql_fields_invalid(caplog):
     # Test with completely invalid SQL
     target = {"rawSql": "INVALID SQL"}

From d6642268023b2033511e105ec6ff7a5b3bd5d3a9 Mon Sep 17 00:00:00 2001
From: Jonny Dixon <jonny.dixon@acryl.io>
Date: Thu, 20 Nov 2025 19:04:00 +0000
Subject: [PATCH 13/15] Update grafana_config.py

---
 .../src/datahub/ingestion/source/grafana/grafana_config.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_config.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_config.py
index b897e25daf0a8a..ab576985f2cc86 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_config.py
@@ -8,6 +8,9 @@
     EnvConfigMixin,
     PlatformInstanceConfigMixin,
 )
+from datahub.ingestion.source.state.stale_entity_removal_handler import (
+    StatefulStaleMetadataRemovalConfig,
+)
 from datahub.ingestion.source.state.stateful_ingestion_base import (
     StatefulIngestionConfigBase,
 )
@@ -104,6 +107,10 @@ class GrafanaSourceConfig(
         description="Map of Grafana datasource types/UIDs to platform connection configs for lineage extraction",
     )
 
+    stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field(
+        default=None, description="Stateful ingestion configuration"
+    )
+
     @field_validator("url", mode="after")
     @classmethod
     def remove_trailing_slash(cls, v: str) -> str:

From d8be1667dbc6dd05add87040343402fe51196ae7 Mon Sep 17 00:00:00 2001
From: Jonny Dixon <jonny.dixon@acryl.io>
Date: Thu, 20 Nov 2025 21:01:57 +0000
Subject: [PATCH 14/15] Update lineage.py

---
 .../ingestion/source/grafana/lineage.py       | 41 ++++++++++---------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/lineage.py
index ace1ed4f93e74f..f66e74804a2e6d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/lineage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/lineage.py
@@ -166,28 +166,31 @@ def _create_column_lineage(
         dataset_urn: str,
         parsed_sql: SqlParsingResult,
     ) -> Optional[MetadataChangeProposalWrapper]:
-        """Create column-level lineage"""
-        if not parsed_sql.column_lineage or not self.include_column_lineage:
+        """Create column-level lineage and dataset-level lineage from parsed SQL"""
+        # Always create dataset-level lineage if we have upstream tables
+        if not parsed_sql.in_tables:
             return None
 
         upstream_lineages = []
-        for col_lineage in parsed_sql.column_lineage:
-            upstream_lineages.append(
-                FineGrainedLineageClass(
-                    downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD,
-                    downstreams=[
-                        make_schema_field_urn(
-                            dataset_urn, col_lineage.downstream.column
-                        )
-                    ],
-                    upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET,
-                    upstreams=[
-                        make_schema_field_urn(upstream_dataset, col.column)
-                        for col in col_lineage.upstreams
-                        for upstream_dataset in parsed_sql.in_tables
-                    ],
+        # Add column-level lineage if available and enabled
+        if parsed_sql.column_lineage and self.include_column_lineage:
+            for col_lineage in parsed_sql.column_lineage:
+                upstream_lineages.append(
+                    FineGrainedLineageClass(
+                        downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD,
+                        downstreams=[
+                            make_schema_field_urn(
+                                dataset_urn, col_lineage.downstream.column
+                            )
+                        ],
+                        upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET,
+                        upstreams=[
+                            make_schema_field_urn(upstream_dataset, col.column)
+                            for col in col_lineage.upstreams
+                            for upstream_dataset in parsed_sql.in_tables
+                        ],
+                    )
                 )
-            )
 
         return MetadataChangeProposalWrapper(
             entityUrn=dataset_urn,
@@ -199,6 +202,6 @@ def _create_column_lineage(
                     )
                     for table in parsed_sql.in_tables
                 ],
-                fineGrainedLineages=upstream_lineages,
+                fineGrainedLineages=upstream_lineages if upstream_lineages else None,
             ),
         )

From 475e8a51b6e4908d3badfc006eeb5e162273bc84 Mon Sep 17 00:00:00 2001
From: Jonny Dixon <jonny.dixon@acryl.io>
Date: Fri, 21 Nov 2025 18:01:41 +0000
Subject: [PATCH 15/15] fixing snapshot lineage overwrite

---
 .../source/grafana/grafana_source.py          |   90 +-
 .../grafana/grafana_basic_mcps_golden.json    |  760 ++++++------
 .../grafana/grafana_mcps_golden.json          | 1014 ++++++++++-------
 3 files changed, 1078 insertions(+), 786 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_source.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_source.py
index 99cb796de7233e..23a779aede623c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_source.py
@@ -61,11 +61,9 @@
     DashboardInfoClass,
     DataPlatformInstanceClass,
     DatasetPropertiesClass,
-    DatasetSnapshotClass,
     GlobalTagsClass,
     InputFieldClass,
     InputFieldsClass,
-    MetadataChangeEventClass,
     OtherSchemaClass,
     SchemaFieldClass,
     SchemaMetadataClass,
@@ -497,47 +495,58 @@ def _process_panel_dataset(
             env=self.env,
         )
 
-        # Create dataset snapshot
-        dataset_snapshot = DatasetSnapshotClass(
-            urn=dataset_urn,
-            aspects=[
-                DataPlatformInstanceClass(
-                    platform=make_data_platform_urn(self.platform),
-                    instance=make_dataplatform_instance_urn(
-                        platform=self.platform,
-                        instance=self.platform_instance,
-                    )
-                    if self.platform_instance
-                    else None,
-                ),
-                DatasetPropertiesClass(
-                    name=f"{ds_uid} ({panel.title or panel.id})",
-                    description="",
-                    customProperties={
-                        "type": ds_type,
-                        "uid": ds_uid,
-                        "full_path": dataset_name,
-                    },
-                ),
-                StatusClass(removed=False),
-            ],
-        )
+        # Platform instance aspect
+        yield MetadataChangeProposalWrapper(
+            entityUrn=dataset_urn,
+            aspect=DataPlatformInstanceClass(
+                platform=make_data_platform_urn(self.platform),
+                instance=make_dataplatform_instance_urn(
+                    platform=self.platform,
+                    instance=self.platform_instance,
+                )
+                if self.platform_instance
+                else None,
+            ),
+        ).as_workunit()
+
+        # Dataset properties aspect
+        yield MetadataChangeProposalWrapper(
+            entityUrn=dataset_urn,
+            aspect=DatasetPropertiesClass(
+                name=f"{ds_uid} ({panel.title or panel.id})",
+                description="",
+                customProperties={
+                    "type": ds_type,
+                    "uid": ds_uid,
+                    "full_path": dataset_name,
+                },
+            ),
+        ).as_workunit()
+
+        # Status aspect
+        yield MetadataChangeProposalWrapper(
+            entityUrn=dataset_urn,
+            aspect=StatusClass(removed=False),
+        ).as_workunit()
 
         # Add schema metadata if available
         schema_fields = extract_fields_from_panel(
             panel, self.config.connection_to_platform_map, self.ctx.graph, self.report
         )
         if schema_fields:
-            schema_metadata = SchemaMetadataClass(
-                schemaName=f"{ds_type}.{ds_uid}.{panel.id}",
-                platform=make_data_platform_urn(self.platform),
-                version=0,
-                fields=schema_fields,
-                hash="",
-                platformSchema=OtherSchemaClass(rawSchema=""),
-            )
-            dataset_snapshot.aspects.append(schema_metadata)
+            yield MetadataChangeProposalWrapper(
+                entityUrn=dataset_urn,
+                aspect=SchemaMetadataClass(
+                    schemaName=f"{ds_type}.{ds_uid}.{panel.id}",
+                    platform=make_data_platform_urn(self.platform),
+                    version=0,
+                    fields=schema_fields,
+                    hash="",
+                    platformSchema=OtherSchemaClass(rawSchema=""),
+                ),
+            ).as_workunit()
 
+        # Add tags if available
         if dashboard_uid and self.config.ingest_tags:
             dashboard = self.api_client.get_dashboard(dashboard_uid)
             if dashboard and dashboard.tags:
@@ -546,13 +555,12 @@ def _process_panel_dataset(
                     tags.append(TagAssociationClass(tag=make_tag_urn(tag)))
 
                 if tags:
-                    dataset_snapshot.aspects.append(GlobalTagsClass(tags=tags))
+                    yield MetadataChangeProposalWrapper(
+                        entityUrn=dataset_urn,
+                        aspect=GlobalTagsClass(tags=tags),
+                    ).as_workunit()
 
         self.report.report_dataset_scanned()
-        yield MetadataWorkUnit(
-            id=f"grafana-dataset-{ds_uid}-{panel.id}",
-            mce=MetadataChangeEventClass(proposedSnapshot=dataset_snapshot),
-        )
 
         # Add dataset to dashboard container
         if dashboard_uid:
diff --git a/metadata-ingestion/tests/integration/grafana/grafana_basic_mcps_golden.json b/metadata-ingestion/tests/integration/grafana/grafana_basic_mcps_golden.json
index afe2386d391a14..572bdfb1f721a3 100644
--- a/metadata-ingestion/tests/integration/grafana/grafana_basic_mcps_golden.json
+++ b/metadata-ingestion/tests/integration/grafana/grafana_basic_mcps_golden.json
@@ -188,102 +188,132 @@
     }
 },
 {
-    "proposedSnapshot": {
-        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.2,PROD)",
-            "aspects": [
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.2,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:grafana"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.2,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "type": "postgres",
+                "uid": "test-postgres",
+                "full_path": "postgres.test-postgres.2"
+            },
+            "name": "test-postgres (Response Times by Dimension)",
+            "description": "",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.2,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.2,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "schemaMetadata",
+    "aspect": {
+        "json": {
+            "schemaName": "postgres.test-postgres.2",
+            "platform": "urn:li:dataPlatform:grafana",
+            "version": 0,
+            "created": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "hash": "",
+            "platformSchema": {
+                "com.linkedin.schema.OtherSchema": {
+                    "rawSchema": ""
+                }
+            },
+            "fields": [
                 {
-                    "com.linkedin.pegasus2avro.common.DataPlatformInstance": {
-                        "platform": "urn:li:dataPlatform:grafana"
-                    }
+                    "fieldPath": "value",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.StringType": {}
+                        }
+                    },
+                    "nativeDataType": "value",
+                    "recursive": false,
+                    "isPartOfKey": false
                 },
                 {
-                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                        "customProperties": {
-                            "type": "postgres",
-                            "uid": "test-postgres",
-                            "full_path": "postgres.test-postgres.2"
-                        },
-                        "name": "test-postgres (Response Times by Dimension)",
-                        "description": "",
-                        "tags": []
-                    }
+                    "fieldPath": "time",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.StringType": {}
+                        }
+                    },
+                    "nativeDataType": "sql_column",
+                    "recursive": false,
+                    "isPartOfKey": false
                 },
                 {
-                    "com.linkedin.pegasus2avro.common.Status": {
-                        "removed": false
-                    }
+                    "fieldPath": "dimension",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.StringType": {}
+                        }
+                    },
+                    "nativeDataType": "sql_column",
+                    "recursive": false,
+                    "isPartOfKey": false
                 },
                 {
-                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "postgres.test-postgres.2",
-                        "platform": "urn:li:dataPlatform:grafana",
-                        "version": 0,
-                        "created": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        },
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        },
-                        "hash": "",
-                        "platformSchema": {
-                            "com.linkedin.pegasus2avro.schema.OtherSchema": {
-                                "rawSchema": ""
-                            }
-                        },
-                        "fields": [
-                            {
-                                "fieldPath": "value",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "value",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "time",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "sql_column",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "dimension",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "sql_column",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "value_ms",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "value",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            }
-                        ]
-                    }
+                    "fieldPath": "value_ms",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.NumberType": {}
+                        }
+                    },
+                    "nativeDataType": "value",
+                    "recursive": false,
+                    "isPartOfKey": false
                 }
             ]
         }
@@ -375,7 +405,7 @@
                 "datasourceType": "postgres",
                 "datasourceUid": "test-postgres",
                 "description": "Response times tracked across different dimensions",
-                "queryCount": "1"
+                "targetsCount": "1"
             },
             "title": "Response Times by Dimension",
             "description": "Response times tracked across different dimensions",
@@ -549,102 +579,132 @@
     }
 },
 {
-    "proposedSnapshot": {
-        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.4,PROD)",
-            "aspects": [
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.4,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:grafana"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.4,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "type": "postgres",
+                "uid": "test-postgres",
+                "full_path": "postgres.test-postgres.4"
+            },
+            "name": "test-postgres (Recent Metrics)",
+            "description": "",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.4,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.4,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "schemaMetadata",
+    "aspect": {
+        "json": {
+            "schemaName": "postgres.test-postgres.4",
+            "platform": "urn:li:dataPlatform:grafana",
+            "version": 0,
+            "created": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "hash": "",
+            "platformSchema": {
+                "com.linkedin.schema.OtherSchema": {
+                    "rawSchema": ""
+                }
+            },
+            "fields": [
                 {
-                    "com.linkedin.pegasus2avro.common.DataPlatformInstance": {
-                        "platform": "urn:li:dataPlatform:grafana"
-                    }
+                    "fieldPath": "metric",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.StringType": {}
+                        }
+                    },
+                    "nativeDataType": "string",
+                    "recursive": false,
+                    "isPartOfKey": false
                 },
                 {
-                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                        "customProperties": {
-                            "type": "postgres",
-                            "uid": "test-postgres",
-                            "full_path": "postgres.test-postgres.4"
-                        },
-                        "name": "test-postgres (Recent Metrics)",
-                        "description": "",
-                        "tags": []
-                    }
+                    "fieldPath": "value",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.NumberType": {}
+                        }
+                    },
+                    "nativeDataType": "number",
+                    "recursive": false,
+                    "isPartOfKey": false
                 },
                 {
-                    "com.linkedin.pegasus2avro.common.Status": {
-                        "removed": false
-                    }
+                    "fieldPath": "dimension",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.StringType": {}
+                        }
+                    },
+                    "nativeDataType": "string",
+                    "recursive": false,
+                    "isPartOfKey": false
                 },
                 {
-                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "postgres.test-postgres.4",
-                        "platform": "urn:li:dataPlatform:grafana",
-                        "version": 0,
-                        "created": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        },
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        },
-                        "hash": "",
-                        "platformSchema": {
-                            "com.linkedin.pegasus2avro.schema.OtherSchema": {
-                                "rawSchema": ""
-                            }
-                        },
-                        "fields": [
-                            {
-                                "fieldPath": "metric",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "string",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "value",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "number",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "dimension",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "string",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "time",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "sql_column",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            }
-                        ]
-                    }
+                    "fieldPath": "time",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.StringType": {}
+                        }
+                    },
+                    "nativeDataType": "sql_column",
+                    "recursive": false,
+                    "isPartOfKey": false
                 }
             ]
         }
@@ -736,7 +796,7 @@
                 "datasourceType": "postgres",
                 "datasourceUid": "test-postgres",
                 "description": "Recent metrics from all sources",
-                "queryCount": "1"
+                "targetsCount": "1"
             },
             "title": "Recent Metrics",
             "description": "Recent metrics from all sources",
@@ -955,78 +1015,108 @@
     }
 },
 {
-    "proposedSnapshot": {
-        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.5,PROD)",
-            "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.DataPlatformInstance": {
-                        "platform": "urn:li:dataPlatform:grafana"
-                    }
-                },
-                {
-                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                        "customProperties": {
-                            "type": "postgres",
-                            "uid": "test-postgres",
-                            "full_path": "postgres.test-postgres.5"
-                        },
-                        "name": "test-postgres (Total Metrics Count)",
-                        "description": "",
-                        "tags": []
-                    }
-                },
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.5,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:grafana"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.5,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "type": "postgres",
+                "uid": "test-postgres",
+                "full_path": "postgres.test-postgres.5"
+            },
+            "name": "test-postgres (Total Metrics Count)",
+            "description": "",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.5,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.5,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "schemaMetadata",
+    "aspect": {
+        "json": {
+            "schemaName": "postgres.test-postgres.5",
+            "platform": "urn:li:dataPlatform:grafana",
+            "version": 0,
+            "created": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "hash": "",
+            "platformSchema": {
+                "com.linkedin.schema.OtherSchema": {
+                    "rawSchema": ""
+                }
+            },
+            "fields": [
                 {
-                    "com.linkedin.pegasus2avro.common.Status": {
-                        "removed": false
-                    }
+                    "fieldPath": "count",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.NumberType": {}
+                        }
+                    },
+                    "nativeDataType": "number",
+                    "recursive": false,
+                    "isPartOfKey": false
                 },
                 {
-                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "postgres.test-postgres.5",
-                        "platform": "urn:li:dataPlatform:grafana",
-                        "version": 0,
-                        "created": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        },
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        },
-                        "hash": "",
-                        "platformSchema": {
-                            "com.linkedin.pegasus2avro.schema.OtherSchema": {
-                                "rawSchema": ""
-                            }
-                        },
-                        "fields": [
-                            {
-                                "fieldPath": "count",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "number",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "time",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
-                                    }
-                                },
-                                "nativeDataType": "timestamp",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            }
-                        ]
-                    }
+                    "fieldPath": "time",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.TimeType": {}
+                        }
+                    },
+                    "nativeDataType": "timestamp",
+                    "recursive": false,
+                    "isPartOfKey": false
                 }
             ]
         }
@@ -1118,7 +1208,7 @@
                 "datasourceType": "postgres",
                 "datasourceUid": "test-postgres",
                 "description": "Total number of metrics collected",
-                "queryCount": "1"
+                "targetsCount": "1"
             },
             "title": "Total Metrics Count",
             "description": "Total number of metrics collected",
@@ -1247,78 +1337,108 @@
     }
 },
 {
-    "proposedSnapshot": {
-        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:grafana,prometheus.test-prometheus.6,PROD)",
-            "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.DataPlatformInstance": {
-                        "platform": "urn:li:dataPlatform:grafana"
-                    }
-                },
-                {
-                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                        "customProperties": {
-                            "type": "prometheus",
-                            "uid": "test-prometheus",
-                            "full_path": "prometheus.test-prometheus.6"
-                        },
-                        "name": "test-prometheus (System Metrics)",
-                        "description": "",
-                        "tags": []
-                    }
-                },
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,prometheus.test-prometheus.6,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:grafana"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,prometheus.test-prometheus.6,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "type": "prometheus",
+                "uid": "test-prometheus",
+                "full_path": "prometheus.test-prometheus.6"
+            },
+            "name": "test-prometheus (System Metrics)",
+            "description": "",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,prometheus.test-prometheus.6,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,prometheus.test-prometheus.6,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "schemaMetadata",
+    "aspect": {
+        "json": {
+            "schemaName": "prometheus.test-prometheus.6",
+            "platform": "urn:li:dataPlatform:grafana",
+            "version": 0,
+            "created": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "hash": "",
+            "platformSchema": {
+                "com.linkedin.schema.OtherSchema": {
+                    "rawSchema": ""
+                }
+            },
+            "fields": [
                 {
-                    "com.linkedin.pegasus2avro.common.Status": {
-                        "removed": false
-                    }
+                    "fieldPath": "CPU Usage",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.NumberType": {}
+                        }
+                    },
+                    "nativeDataType": "prometheus_metric",
+                    "recursive": false,
+                    "isPartOfKey": false
                 },
                 {
-                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "prometheus.test-prometheus.6",
-                        "platform": "urn:li:dataPlatform:grafana",
-                        "version": 0,
-                        "created": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        },
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        },
-                        "hash": "",
-                        "platformSchema": {
-                            "com.linkedin.pegasus2avro.schema.OtherSchema": {
-                                "rawSchema": ""
-                            }
-                        },
-                        "fields": [
-                            {
-                                "fieldPath": "CPU Usage",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "prometheus_metric",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "Memory Usage",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "prometheus_metric",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            }
-                        ]
-                    }
+                    "fieldPath": "Memory Usage",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.NumberType": {}
+                        }
+                    },
+                    "nativeDataType": "prometheus_metric",
+                    "recursive": false,
+                    "isPartOfKey": false
                 }
             ]
         }
@@ -1410,7 +1530,7 @@
                 "datasourceType": "prometheus",
                 "datasourceUid": "test-prometheus",
                 "description": "Prometheus system metrics",
-                "queryCount": "2"
+                "targetsCount": "2"
             },
             "title": "System Metrics",
             "description": "Prometheus system metrics",
diff --git a/metadata-ingestion/tests/integration/grafana/grafana_mcps_golden.json b/metadata-ingestion/tests/integration/grafana/grafana_mcps_golden.json
index ce1d98601c8043..7145f4d325da95 100644
--- a/metadata-ingestion/tests/integration/grafana/grafana_mcps_golden.json
+++ b/metadata-ingestion/tests/integration/grafana/grafana_mcps_golden.json
@@ -223,115 +223,133 @@
     }
 },
 {
-    "proposedSnapshot": {
-        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.2,PROD)",
-            "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.DataPlatformInstance": {
-                        "platform": "urn:li:dataPlatform:grafana",
-                        "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:grafana,local-grafana)"
-                    }
-                },
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.2,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:grafana",
+            "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:grafana,local-grafana)"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.2,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "type": "postgres",
+                "uid": "test-postgres",
+                "full_path": "postgres.test-postgres.2"
+            },
+            "name": "test-postgres (Response Times by Dimension)",
+            "description": "",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.2,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.2,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "schemaMetadata",
+    "aspect": {
+        "json": {
+            "schemaName": "postgres.test-postgres.2",
+            "platform": "urn:li:dataPlatform:grafana",
+            "version": 0,
+            "created": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "hash": "",
+            "platformSchema": {
+                "com.linkedin.schema.OtherSchema": {
+                    "rawSchema": ""
+                }
+            },
+            "fields": [
                 {
-                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                        "customProperties": {
-                            "type": "postgres",
-                            "uid": "test-postgres",
-                            "full_path": "postgres.test-postgres.2"
-                        },
-                        "name": "test-postgres (Response Times by Dimension)",
-                        "description": "",
-                        "tags": []
-                    }
+                    "fieldPath": "value",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.StringType": {}
+                        }
+                    },
+                    "nativeDataType": "value",
+                    "recursive": false,
+                    "isPartOfKey": false
                 },
                 {
-                    "com.linkedin.pegasus2avro.common.Status": {
-                        "removed": false
-                    }
+                    "fieldPath": "time",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.NullType": {}
+                        }
+                    },
+                    "nativeDataType": "",
+                    "recursive": false,
+                    "isPartOfKey": false
                 },
                 {
-                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "postgres.test-postgres.2",
-                        "platform": "urn:li:dataPlatform:grafana",
-                        "version": 0,
-                        "created": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        },
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        },
-                        "hash": "",
-                        "platformSchema": {
-                            "com.linkedin.pegasus2avro.schema.OtherSchema": {
-                                "rawSchema": ""
-                            }
-                        },
-                        "fields": [
-                            {
-                                "fieldPath": "value",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "value",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "time",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NullType": {}
-                                    }
-                                },
-                                "nativeDataType": "",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "dimension",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NullType": {}
-                                    }
-                                },
-                                "nativeDataType": "",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "value_ms",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "value",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            }
-                        ]
-                    }
+                    "fieldPath": "dimension",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.NullType": {}
+                        }
+                    },
+                    "nativeDataType": "",
+                    "recursive": false,
+                    "isPartOfKey": false
                 },
                 {
-                    "com.linkedin.pegasus2avro.common.GlobalTags": {
-                        "tags": [
-                            {
-                                "tag": "urn:li:tag:test-tag"
-                            },
-                            {
-                                "tag": "urn:li:tag:integration-test"
-                            }
-                        ]
-                    }
+                    "fieldPath": "value_ms",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.NumberType": {}
+                        }
+                    },
+                    "nativeDataType": "value",
+                    "recursive": false,
+                    "isPartOfKey": false
                 }
             ]
         }
@@ -346,10 +364,17 @@
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.2,PROD)",
     "changeType": "UPSERT",
-    "aspectName": "container",
+    "aspectName": "globalTags",
     "aspect": {
         "json": {
-            "container": "urn:li:container:ae0ac23df7f392b003891eb008eea810"
+            "tags": [
+                {
+                    "tag": "urn:li:tag:test-tag"
+                },
+                {
+                    "tag": "urn:li:tag:integration-test"
+                }
+            ]
         }
     },
     "systemMetadata": {
@@ -362,19 +387,10 @@
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.2,PROD)",
     "changeType": "UPSERT",
-    "aspectName": "upstreamLineage",
+    "aspectName": "container",
     "aspect": {
         "json": {
-            "upstreams": [
-                {
-                    "auditStamp": {
-                        "time": 0,
-                        "actor": "urn:li:corpuser:unknown"
-                    },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local.grafana.test-postgres,PROD)",
-                    "type": "TRANSFORMED"
-                }
-            ]
+            "container": "urn:li:container:ae0ac23df7f392b003891eb008eea810"
         }
     },
     "systemMetadata": {
@@ -453,7 +469,7 @@
                 "datasourceType": "postgres",
                 "datasourceUid": "test-postgres",
                 "description": "Response times tracked across different dimensions",
-                "queryCount": "1"
+                "targetsCount": "1"
             },
             "title": "Response Times by Dimension",
             "description": "Response times tracked across different dimensions",
@@ -654,115 +670,133 @@
     }
 },
 {
-    "proposedSnapshot": {
-        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.4,PROD)",
-            "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.DataPlatformInstance": {
-                        "platform": "urn:li:dataPlatform:grafana",
-                        "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:grafana,local-grafana)"
-                    }
-                },
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.4,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:grafana",
+            "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:grafana,local-grafana)"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.4,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "type": "postgres",
+                "uid": "test-postgres",
+                "full_path": "postgres.test-postgres.4"
+            },
+            "name": "test-postgres (Recent Metrics)",
+            "description": "",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.4,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.4,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "schemaMetadata",
+    "aspect": {
+        "json": {
+            "schemaName": "postgres.test-postgres.4",
+            "platform": "urn:li:dataPlatform:grafana",
+            "version": 0,
+            "created": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "hash": "",
+            "platformSchema": {
+                "com.linkedin.schema.OtherSchema": {
+                    "rawSchema": ""
+                }
+            },
+            "fields": [
                 {
-                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                        "customProperties": {
-                            "type": "postgres",
-                            "uid": "test-postgres",
-                            "full_path": "postgres.test-postgres.4"
-                        },
-                        "name": "test-postgres (Recent Metrics)",
-                        "description": "",
-                        "tags": []
-                    }
+                    "fieldPath": "metric",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.StringType": {}
+                        }
+                    },
+                    "nativeDataType": "string",
+                    "recursive": false,
+                    "isPartOfKey": false
                 },
                 {
-                    "com.linkedin.pegasus2avro.common.Status": {
-                        "removed": false
-                    }
+                    "fieldPath": "value",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.NumberType": {}
+                        }
+                    },
+                    "nativeDataType": "number",
+                    "recursive": false,
+                    "isPartOfKey": false
                 },
                 {
-                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "postgres.test-postgres.4",
-                        "platform": "urn:li:dataPlatform:grafana",
-                        "version": 0,
-                        "created": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        },
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        },
-                        "hash": "",
-                        "platformSchema": {
-                            "com.linkedin.pegasus2avro.schema.OtherSchema": {
-                                "rawSchema": ""
-                            }
-                        },
-                        "fields": [
-                            {
-                                "fieldPath": "metric",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "string",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "value",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "number",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "dimension",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.StringType": {}
-                                    }
-                                },
-                                "nativeDataType": "string",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "time",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NullType": {}
-                                    }
-                                },
-                                "nativeDataType": "",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            }
-                        ]
-                    }
+                    "fieldPath": "dimension",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.StringType": {}
+                        }
+                    },
+                    "nativeDataType": "string",
+                    "recursive": false,
+                    "isPartOfKey": false
                 },
                 {
-                    "com.linkedin.pegasus2avro.common.GlobalTags": {
-                        "tags": [
-                            {
-                                "tag": "urn:li:tag:test-tag"
-                            },
-                            {
-                                "tag": "urn:li:tag:integration-test"
-                            }
-                        ]
-                    }
+                    "fieldPath": "time",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.NullType": {}
+                        }
+                    },
+                    "nativeDataType": "",
+                    "recursive": false,
+                    "isPartOfKey": false
                 }
             ]
         }
@@ -777,10 +811,17 @@
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.4,PROD)",
     "changeType": "UPSERT",
-    "aspectName": "container",
+    "aspectName": "globalTags",
     "aspect": {
         "json": {
-            "container": "urn:li:container:ae0ac23df7f392b003891eb008eea810"
+            "tags": [
+                {
+                    "tag": "urn:li:tag:test-tag"
+                },
+                {
+                    "tag": "urn:li:tag:integration-test"
+                }
+            ]
         }
     },
     "systemMetadata": {
@@ -793,19 +834,10 @@
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.4,PROD)",
     "changeType": "UPSERT",
-    "aspectName": "upstreamLineage",
+    "aspectName": "container",
     "aspect": {
         "json": {
-            "upstreams": [
-                {
-                    "auditStamp": {
-                        "time": 0,
-                        "actor": "urn:li:corpuser:unknown"
-                    },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local.grafana.test-postgres,PROD)",
-                    "type": "TRANSFORMED"
-                }
-            ]
+            "container": "urn:li:container:ae0ac23df7f392b003891eb008eea810"
         }
     },
     "systemMetadata": {
@@ -884,7 +916,7 @@
                 "datasourceType": "postgres",
                 "datasourceUid": "test-postgres",
                 "description": "Recent metrics from all sources",
-                "queryCount": "1"
+                "targetsCount": "1"
             },
             "title": "Recent Metrics",
             "description": "Recent metrics from all sources",
@@ -1130,93 +1162,14 @@
     }
 },
 {
-    "proposedSnapshot": {
-        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)",
-            "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.DataPlatformInstance": {
-                        "platform": "urn:li:dataPlatform:grafana",
-                        "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:grafana,local-grafana)"
-                    }
-                },
-                {
-                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                        "customProperties": {
-                            "type": "postgres",
-                            "uid": "test-postgres",
-                            "full_path": "postgres.test-postgres.5"
-                        },
-                        "name": "test-postgres (Total Metrics Count)",
-                        "description": "",
-                        "tags": []
-                    }
-                },
-                {
-                    "com.linkedin.pegasus2avro.common.Status": {
-                        "removed": false
-                    }
-                },
-                {
-                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "postgres.test-postgres.5",
-                        "platform": "urn:li:dataPlatform:grafana",
-                        "version": 0,
-                        "created": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        },
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        },
-                        "hash": "",
-                        "platformSchema": {
-                            "com.linkedin.pegasus2avro.schema.OtherSchema": {
-                                "rawSchema": ""
-                            }
-                        },
-                        "fields": [
-                            {
-                                "fieldPath": "count",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "number",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "time",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.TimeType": {}
-                                    }
-                                },
-                                "nativeDataType": "timestamp",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            }
-                        ]
-                    }
-                },
-                {
-                    "com.linkedin.pegasus2avro.common.GlobalTags": {
-                        "tags": [
-                            {
-                                "tag": "urn:li:tag:test-tag"
-                            },
-                            {
-                                "tag": "urn:li:tag:integration-test"
-                            }
-                        ]
-                    }
-                }
-            ]
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:grafana",
+            "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:grafana,local-grafana)"
         }
     },
     "systemMetadata": {
@@ -1229,10 +1182,17 @@
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)",
     "changeType": "UPSERT",
-    "aspectName": "container",
+    "aspectName": "datasetProperties",
     "aspect": {
         "json": {
-            "container": "urn:li:container:ae0ac23df7f392b003891eb008eea810"
+            "customProperties": {
+                "type": "postgres",
+                "uid": "test-postgres",
+                "full_path": "postgres.test-postgres.5"
+            },
+            "name": "test-postgres (Total Metrics Count)",
+            "description": "",
+            "tags": []
         }
     },
     "systemMetadata": {
@@ -1245,17 +1205,66 @@
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)",
     "changeType": "UPSERT",
-    "aspectName": "upstreamLineage",
+    "aspectName": "status",
     "aspect": {
         "json": {
-            "upstreams": [
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "schemaMetadata",
+    "aspect": {
+        "json": {
+            "schemaName": "postgres.test-postgres.5",
+            "platform": "urn:li:dataPlatform:grafana",
+            "version": 0,
+            "created": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "hash": "",
+            "platformSchema": {
+                "com.linkedin.schema.OtherSchema": {
+                    "rawSchema": ""
+                }
+            },
+            "fields": [
                 {
-                    "auditStamp": {
-                        "time": 0,
-                        "actor": "urn:li:corpuser:unknown"
+                    "fieldPath": "count",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.NumberType": {}
+                        }
                     },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local.grafana.test-postgres,PROD)",
-                    "type": "TRANSFORMED"
+                    "nativeDataType": "number",
+                    "recursive": false,
+                    "isPartOfKey": false
+                },
+                {
+                    "fieldPath": "time",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.TimeType": {}
+                        }
+                    },
+                    "nativeDataType": "timestamp",
+                    "recursive": false,
+                    "isPartOfKey": false
                 }
             ]
         }
@@ -1266,6 +1275,45 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "globalTags",
+    "aspect": {
+        "json": {
+            "tags": [
+                {
+                    "tag": "urn:li:tag:test-tag"
+                },
+                {
+                    "tag": "urn:li:tag:integration-test"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:ae0ac23df7f392b003891eb008eea810"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)",
@@ -1336,7 +1384,7 @@
                 "datasourceType": "postgres",
                 "datasourceUid": "test-postgres",
                 "description": "Total number of metrics collected",
-                "queryCount": "1"
+                "targetsCount": "1"
             },
             "title": "Total Metrics Count",
             "description": "Total number of metrics collected",
@@ -1492,91 +1540,109 @@
     }
 },
 {
-    "proposedSnapshot": {
-        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
-            "urn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.prometheus.test-prometheus.6,PROD)",
-            "aspects": [
-                {
-                    "com.linkedin.pegasus2avro.common.DataPlatformInstance": {
-                        "platform": "urn:li:dataPlatform:grafana",
-                        "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:grafana,local-grafana)"
-                    }
-                },
-                {
-                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
-                        "customProperties": {
-                            "type": "prometheus",
-                            "uid": "test-prometheus",
-                            "full_path": "prometheus.test-prometheus.6"
-                        },
-                        "name": "test-prometheus (System Metrics)",
-                        "description": "",
-                        "tags": []
-                    }
-                },
-                {
-                    "com.linkedin.pegasus2avro.common.Status": {
-                        "removed": false
-                    }
-                },
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.prometheus.test-prometheus.6,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:grafana",
+            "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:grafana,local-grafana)"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.prometheus.test-prometheus.6,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "datasetProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "type": "prometheus",
+                "uid": "test-prometheus",
+                "full_path": "prometheus.test-prometheus.6"
+            },
+            "name": "test-prometheus (System Metrics)",
+            "description": "",
+            "tags": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.prometheus.test-prometheus.6,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.prometheus.test-prometheus.6,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "schemaMetadata",
+    "aspect": {
+        "json": {
+            "schemaName": "prometheus.test-prometheus.6",
+            "platform": "urn:li:dataPlatform:grafana",
+            "version": 0,
+            "created": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "lastModified": {
+                "time": 0,
+                "actor": "urn:li:corpuser:unknown"
+            },
+            "hash": "",
+            "platformSchema": {
+                "com.linkedin.schema.OtherSchema": {
+                    "rawSchema": ""
+                }
+            },
+            "fields": [
                 {
-                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
-                        "schemaName": "prometheus.test-prometheus.6",
-                        "platform": "urn:li:dataPlatform:grafana",
-                        "version": 0,
-                        "created": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        },
-                        "lastModified": {
-                            "time": 0,
-                            "actor": "urn:li:corpuser:unknown"
-                        },
-                        "hash": "",
-                        "platformSchema": {
-                            "com.linkedin.pegasus2avro.schema.OtherSchema": {
-                                "rawSchema": ""
-                            }
-                        },
-                        "fields": [
-                            {
-                                "fieldPath": "CPU Usage",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "prometheus_metric",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            },
-                            {
-                                "fieldPath": "Memory Usage",
-                                "nullable": false,
-                                "type": {
-                                    "type": {
-                                        "com.linkedin.pegasus2avro.schema.NumberType": {}
-                                    }
-                                },
-                                "nativeDataType": "prometheus_metric",
-                                "recursive": false,
-                                "isPartOfKey": false
-                            }
-                        ]
-                    }
+                    "fieldPath": "CPU Usage",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.NumberType": {}
+                        }
+                    },
+                    "nativeDataType": "prometheus_metric",
+                    "recursive": false,
+                    "isPartOfKey": false
                 },
                 {
-                    "com.linkedin.pegasus2avro.common.GlobalTags": {
-                        "tags": [
-                            {
-                                "tag": "urn:li:tag:test-tag"
-                            },
-                            {
-                                "tag": "urn:li:tag:integration-test"
-                            }
-                        ]
-                    }
+                    "fieldPath": "Memory Usage",
+                    "nullable": false,
+                    "type": {
+                        "type": {
+                            "com.linkedin.schema.NumberType": {}
+                        }
+                    },
+                    "nativeDataType": "prometheus_metric",
+                    "recursive": false,
+                    "isPartOfKey": false
                 }
             ]
         }
@@ -1591,10 +1657,17 @@
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.prometheus.test-prometheus.6,PROD)",
     "changeType": "UPSERT",
-    "aspectName": "container",
+    "aspectName": "globalTags",
     "aspect": {
         "json": {
-            "container": "urn:li:container:ae0ac23df7f392b003891eb008eea810"
+            "tags": [
+                {
+                    "tag": "urn:li:tag:test-tag"
+                },
+                {
+                    "tag": "urn:li:tag:integration-test"
+                }
+            ]
         }
     },
     "systemMetadata": {
@@ -1607,19 +1680,10 @@
     "entityType": "dataset",
     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.prometheus.test-prometheus.6,PROD)",
     "changeType": "UPSERT",
-    "aspectName": "upstreamLineage",
+    "aspectName": "container",
     "aspect": {
         "json": {
-            "upstreams": [
-                {
-                    "auditStamp": {
-                        "time": 0,
-                        "actor": "urn:li:corpuser:unknown"
-                    },
-                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:prometheus,local.test-prometheus,PROD)",
-                    "type": "TRANSFORMED"
-                }
-            ]
+            "container": "urn:li:container:ae0ac23df7f392b003891eb008eea810"
         }
     },
     "systemMetadata": {
@@ -1698,7 +1762,7 @@
                 "datasourceType": "prometheus",
                 "datasourceUid": "test-prometheus",
                 "description": "Prometheus system metrics",
-                "queryCount": "2"
+                "targetsCount": "2"
             },
             "title": "System Metrics",
             "description": "Prometheus system metrics",
@@ -1838,6 +1902,106 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.2,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local.grafana.test-postgres,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.4,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local.grafana.test-postgres,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local.grafana.test-postgres,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.prometheus.test-prometheus.6,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "upstreamLineage",
+    "aspect": {
+        "json": {
+            "upstreams": [
+                {
+                    "auditStamp": {
+                        "time": 0,
+                        "actor": "urn:li:corpuser:unknown"
+                    },
+                    "dataset": "urn:li:dataset:(urn:li:dataPlatform:prometheus,local.test-prometheus,PROD)",
+                    "type": "TRANSFORMED"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1720785600000,
+        "runId": "grafana-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "dashboard",
     "entityUrn": "urn:li:dashboard:(grafana,local-grafana.default)",