From ef05bd994feac6e8ea6a99b83b33abf7e2b42f71 Mon Sep 17 00:00:00 2001 From: Jonny Dixon Date: Thu, 13 Nov 2025 15:36:12 +0000 Subject: [PATCH 01/15] fix(ingestion/grafana): fix fails caused by text panels in ingestion --- .../ingestion/source/grafana/grafana_api.py | 7 +- .../source/grafana/grafana_config.py | 5 + .../source/grafana/grafana_source.py | 1 + .../ingestion/source/grafana/models.py | 156 +++++++++--------- .../tests/unit/grafana/test_grafana_models.py | 111 +++++++++++++ 5 files changed, 205 insertions(+), 75 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_api.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_api.py index 95bd857eca7a48..1c7d50ca43ec75 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_api.py @@ -23,11 +23,13 @@ def __init__( verify_ssl: bool, page_size: int, report: GrafanaSourceReport, + skip_text_panels: bool = False, ) -> None: self.base_url = base_url self.verify_ssl = verify_ssl self.page_size = page_size self.report = report + self.skip_text_panels = skip_text_panels self.session = self._create_session(token) def _create_session(self, token: SecretStr) -> requests.Session: @@ -88,7 +90,10 @@ def get_dashboard(self, uid: str) -> Optional[Dashboard]: try: response = self.session.get(f"{self.base_url}/api/dashboards/uid/{uid}") response.raise_for_status() - return Dashboard.model_validate(response.json()) + dashboard_data = response.json() + if self.skip_text_panels: + dashboard_data["_skip_text_panels"] = True + return Dashboard.model_validate(dashboard_data) except requests.exceptions.RequestException as e: self.report.warning( title="Dashboard Fetch Error", diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_config.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_config.py index e84cdab6da9f9a..b897e25daf0a8a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_config.py @@ -80,6 +80,11 @@ class GrafanaSourceConfig( ingest_owners: bool = Field( default=True, description="Whether to ingest dashboard ownership information" ) + skip_text_panels: bool = Field( + default=False, + description="Whether to skip text panels during ingestion. " + "Text panels don't contain data visualizations and may not be relevant for data lineage.", + ) include_lineage: bool = Field( default=True, diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_source.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_source.py index dc26e7c4a0f631..99cb796de7233e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_source.py @@ -154,6 +154,7 @@ def __init__(self, config: GrafanaSourceConfig, ctx: PipelineContext): verify_ssl=self.config.verify_ssl, page_size=self.config.page_size, report=self.report, + skip_text_panels=self.config.skip_text_panels, ) # Initialize lineage extractor with graph diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py index c035c91037f31b..695b48d17458f2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py @@ -11,49 +11,47 @@ import logging from typing import Any, Dict, List, Optional -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator from datahub.emitter.mcp_builder import ContainerKey logger = logging.getLogger(__name__) -# Grafana-specific type definitions for better type safety -GrafanaQueryTarget = Dict[ - str, Any -] # Query targets: refId, expr/query, datasource, hide, etc. -GrafanaFieldConfig = Dict[ - str, Any -] # Field config: defaults, overrides, display settings -GrafanaTransformation = Dict[str, Any] # Transformations: id, options + +GrafanaQueryTarget = Dict[str, Any] +GrafanaFieldConfig = Dict[str, Any] +GrafanaTransformation = Dict[str, Any] class _GrafanaBaseModel(BaseModel): - model_config = ConfigDict(coerce_numbers_to_str=True) + model_config = ConfigDict( + coerce_numbers_to_str=True, + populate_by_name=True, + extra="ignore", + ) class DatasourceRef(_GrafanaBaseModel): """Reference to a Grafana datasource.""" - type: Optional[str] = None # Datasource type (prometheus, mysql, postgres, etc.) - uid: Optional[str] = None # Datasource unique identifier - name: Optional[str] = None # Datasource display name + type: Optional[str] = None + uid: Optional[str] = None + name: Optional[str] = None class Panel(_GrafanaBaseModel): """Represents a Grafana dashboard panel.""" id: str - title: str + title: Optional[str] = ( + None # Optional: text panels in Grafana v11+ don't have titles + ) description: str = "" type: Optional[str] = None - # Query targets - each contains refId (A,B,C...), query/expr, datasource ref, etc. query_targets: List[GrafanaQueryTarget] = Field( default_factory=list, alias="targets" ) - # Datasource reference - contains type, uid, name datasource_ref: Optional[DatasourceRef] = Field(default=None, alias="datasource") - # Field configuration - display settings, defaults, overrides field_config: GrafanaFieldConfig = Field(default_factory=dict, alias="fieldConfig") - # Data transformations - each contains id and transformation-specific options transformations: List[GrafanaTransformation] = Field(default_factory=list) @@ -64,74 +62,84 @@ class Dashboard(_GrafanaBaseModel): title: str description: str = "" version: Optional[str] = None - panels: List[Panel] - tags: List[str] + panels: List[Panel] = Field(default_factory=list) + tags: List[str] = Field(default_factory=list) timezone: Optional[str] = None refresh: Optional[str] = None schema_version: Optional[str] = Field(default=None, alias="schemaVersion") - folder_id: Optional[str] = Field(default=None, alias="meta.folderId") + folder_id: Optional[str] = None created_by: Optional[str] = None @staticmethod - def extract_panels(panels_data: List[Dict[str, Any]]) -> List[Panel]: - """Extract panels, including nested ones.""" + def extract_panels( + panels_data: List[Dict[str, Any]], skip_text_panels: bool = False + ) -> List[Panel]: + """Extract panels, including nested ones, skipping invalid panels.""" panels: List[Panel] = [] for panel_data in panels_data: if panel_data.get("type") == "row" and "panels" in panel_data: - panels.extend( - Panel.model_validate(p) - for p in panel_data["panels"] - if p.get("type") != "row" - ) + for p in panel_data["panels"]: + if p.get("type") != "row": + if skip_text_panels and p.get("type") == "text": + continue + try: + panels.append(Panel.model_validate(p)) + except Exception as e: + logger.warning( + f"Error parsing panel (id={p.get('id')}, type={p.get('type')}): {e}. Skipping this panel." + ) elif panel_data.get("type") != "row": - panels.append(Panel.model_validate(panel_data)) + if skip_text_panels and panel_data.get("type") == "text": + continue + try: + panels.append(Panel.model_validate(panel_data)) + except Exception as e: + logger.warning( + f"Error parsing panel (id={panel_data.get('id')}, type={panel_data.get('type')}): {e}. Skipping this panel." + ) return panels + @field_validator("refresh", mode="before") + @classmethod + def convert_refresh_to_string(cls, v: Any) -> Optional[str]: + """Convert boolean refresh values to strings for compatibility.""" + if isinstance(v, bool): + return str(v) + return v + + @model_validator(mode="before") @classmethod - def model_validate( - cls, - obj: Any, - *, - strict: Optional[bool] = None, - from_attributes: Optional[bool] = None, - context: Optional[Any] = None, - by_alias: Optional[bool] = None, - by_name: Optional[bool] = None, - ) -> "Dashboard": - """Custom parsing to handle nested panel extraction.""" - # Handle both direct dashboard data and nested structure with 'dashboard' key - dashboard_data = obj.get("dashboard", obj) - - _panel_data = dashboard_data.get("panels", []) - panels = [] - try: - panels = cls.extract_panels(_panel_data) - except Exception as e: - logger.warning( - f"Error extracting panels from dashboard for dashboard panels {_panel_data} : {e}" - ) - - # Extract meta.folderId from nested structure - meta = dashboard_data.get("meta", {}) - folder_id = meta.get("folderId") - - # Create dashboard data without meta to avoid conflicts - dashboard_dict = {**dashboard_data, "panels": panels, "folder_id": folder_id} - if "meta" in dashboard_dict: - del dashboard_dict["meta"] - - # Handle refresh field type mismatch - convert boolean to string - if "refresh" in dashboard_dict and isinstance(dashboard_dict["refresh"], bool): - dashboard_dict["refresh"] = str(dashboard_dict["refresh"]) - - return super().model_validate( - dashboard_dict, - strict=strict, - from_attributes=from_attributes, - context=context, - by_alias=by_alias, - by_name=by_name, - ) + def extract_dashboard_data(cls, data: Any) -> Dict[str, Any]: + """Extract dashboard data from nested structure and process panels.""" + if isinstance(data, dict): + dashboard_data = data.get("dashboard", data) + + _panel_data = dashboard_data.get("panels", []) + panels = [] + + skip_text_panels = dashboard_data.get("_skip_text_panels", False) + + if _panel_data and all(isinstance(p, dict) for p in _panel_data): + try: + panels = cls.extract_panels(_panel_data, skip_text_panels) + except Exception as e: + logger.warning(f"Error extracting panels from dashboard: {e}") + else: + panels = _panel_data + + meta = dashboard_data.get("meta", {}) + folder_id = meta.get("folderId") if meta else None + + result = {**dashboard_data, "panels": panels} + if folder_id is not None: + result["folder_id"] = folder_id + + result.pop("meta", None) + result.pop("_skip_text_panels", None) + + return result + + return data class Folder(_GrafanaBaseModel): @@ -152,4 +160,4 @@ class DashboardContainerKey(ContainerKey): """Key for identifying a Grafana dashboard.""" dashboard_id: str - folder_id: Optional[str] = None # Reference to parent folder + folder_id: Optional[str] = None diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_models.py b/metadata-ingestion/tests/unit/grafana/test_grafana_models.py index 20f69d75136d33..2ae29f763097c6 100644 --- a/metadata-ingestion/tests/unit/grafana/test_grafana_models.py +++ b/metadata-ingestion/tests/unit/grafana/test_grafana_models.py @@ -88,6 +88,117 @@ def test_dashboard_nested_panels(): assert dashboard.panels[1].title == "Top Level Panel" +def test_panel_without_title(): + """Test that text panels without titles are parsed successfully.""" + panel_data: Dict[str, Any] = { + "id": "1", + "type": "text", + "datasource": {"type": "-- Grafana --", "uid": "-- Grafana --"}, + } + + panel = Panel.model_validate(panel_data) + assert panel.id == "1" + assert panel.title is None + assert panel.type == "text" + + +def test_dashboard_with_text_panel(): + """Test that dashboards with text panels (no title) are parsed successfully.""" + dashboard_data = { + "dashboard": { + "uid": "dash1", + "title": "Test Dashboard", + "description": "", + "version": "1", + "panels": [ + {"id": "1", "title": "Regular Panel", "type": "graph"}, + { + "id": "2", + "type": "text", + "datasource": {"type": "-- Grafana --", "uid": "-- Grafana --"}, + }, + ], + "tags": [], + } + } + + dashboard = Dashboard.model_validate(dashboard_data) + assert len(dashboard.panels) == 2 + assert dashboard.panels[0].title == "Regular Panel" + assert dashboard.panels[1].title is None + assert dashboard.panels[1].type == "text" + + +def test_dashboard_with_invalid_panel_skips(): + """Test that invalid panels are skipped with a warning instead of failing.""" + dashboard_data = { + "dashboard": { + "uid": "dash1", + "title": "Test Dashboard", + "description": "", + "version": "1", + "panels": [ + {"id": "1", "title": "Valid Panel", "type": "graph"}, + # Missing required 'id' field - should be skipped + {"title": "Invalid Panel", "type": "graph"}, + {"id": "3", "title": "Another Valid Panel", "type": "table"}, + ], + "tags": [], + } + } + + dashboard = Dashboard.model_validate(dashboard_data) + # Should have 2 panels (invalid one is skipped) + assert len(dashboard.panels) == 2 + assert dashboard.panels[0].title == "Valid Panel" + assert dashboard.panels[1].title == "Another Valid Panel" + + +def test_dashboard_skip_text_panels(): + """Test that text panels can be skipped when skip_text_panels is True.""" + dashboard_data_with_text = { + "dashboard": { + "uid": "dash1", + "title": "Test Dashboard", + "description": "", + "version": "1", + "panels": [ + {"id": "1", "title": "Regular Panel", "type": "graph"}, + {"id": "2", "type": "text"}, + {"id": "3", "title": "Another Panel", "type": "table"}, + ], + "tags": [], + } + } + + # With skip_text_panels=False (default), all panels should be included + dashboard_with_text = Dashboard.model_validate(dashboard_data_with_text) + assert len(dashboard_with_text.panels) == 3 + + # With skip_text_panels=True, text panels should be skipped + dashboard_data_skip_text = { + "dashboard": { + "uid": "dash1", + "title": "Test Dashboard", + "description": "", + "version": "1", + "panels": [ + {"id": "1", "title": "Regular Panel", "type": "graph"}, + {"id": "2", "type": "text"}, + {"id": "3", "title": "Another Panel", "type": "table"}, + ], + "tags": [], + "_skip_text_panels": True, + } + } + dashboard_no_text = Dashboard.model_validate(dashboard_data_skip_text) + assert len(dashboard_no_text.panels) == 2 + assert dashboard_no_text.panels[0].title == "Regular Panel" + assert dashboard_no_text.panels[0].type == "graph" + assert dashboard_no_text.panels[1].title == "Another Panel" + assert dashboard_no_text.panels[1].type == "table" + + def test_folder(): folder_data = {"id": "1", "title": "Test Folder", "description": "Test Description"} From 4d4845135b0f951e5949f3e9745d0cea50d9102f Mon Sep 17 00:00:00 2001 From: Jonny Dixon Date: Tue, 18 Nov 2025 10:10:45 +0000 Subject: [PATCH 02/15] CUS-6824 --- .../ingestion/source/grafana/models.py | 39 ++- .../tests/integration/grafana/test_grafana.py | 14 +- .../tests/unit/grafana/test_grafana_models.py | 243 ++++++++++++++++++ .../unit/grafana/test_grafana_validation.py | 213 +++++++++++++++ 4 files changed, 494 insertions(+), 15 deletions(-) create mode 100644 metadata-ingestion/tests/unit/grafana/test_grafana_validation.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py index db11ff527948b1..15e6e9f3d49a23 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py @@ -54,6 +54,23 @@ class Panel(_GrafanaBaseModel): field_config: GrafanaFieldConfig = Field(default_factory=dict, alias="fieldConfig") transformations: List[GrafanaTransformation] = Field(default_factory=list) + @model_validator(mode="before") + @classmethod + def ensure_panel_defaults(cls, data: Any) -> Dict[str, Any]: + """Set defaults for optional fields and normalize data types.""" + if isinstance(data, dict): + result = dict(data) + result.setdefault("description", "") + result.setdefault("targets", []) + result.setdefault("transformations", []) + result.setdefault("fieldConfig", {}) + + if "id" in result and isinstance(result["id"], int): + result["id"] = str(result["id"]) + + return result + return data + class Dashboard(_GrafanaBaseModel): """Represents a Grafana dashboard.""" @@ -85,8 +102,13 @@ def extract_panels( try: panels.append(Panel.model_validate(p)) except Exception as e: + logger.debug( + f"Error parsing nested panel (id={p.get('id')}, type={p.get('type')}): {e}. " + f"Panel data: {p}. Skipping this panel." + ) logger.warning( - f"Error parsing panel (id={p.get('id')}, type={p.get('type')}): {e}. Skipping this panel." + f"Skipping panel with validation errors (id={p.get('id')}, type={p.get('type')}): " + f"Missing or invalid fields. Enable debug logging for details." ) elif panel_data.get("type") != "row": if skip_text_panels and panel_data.get("type") == "text": @@ -94,8 +116,13 @@ def extract_panels( try: panels.append(Panel.model_validate(panel_data)) except Exception as e: + logger.debug( + f"Error parsing panel (id={panel_data.get('id')}, type={panel_data.get('type')}): {e}. " + f"Panel data: {panel_data}. Skipping this panel." + ) logger.warning( - f"Error parsing panel (id={panel_data.get('id')}, type={panel_data.get('type')}): {e}. Skipping this panel." + f"Skipping panel with validation errors (id={panel_data.get('id')}, type={panel_data.get('type')}): " + f"Missing or invalid fields. Enable debug logging for details." ) return panels @@ -134,6 +161,14 @@ def extract_dashboard_data(cls, data: Any) -> Dict[str, Any]: if folder_id is not None: result["folder_id"] = folder_id + # Set defaults for optional fields that may be missing from API responses + result.setdefault("tags", []) + result.setdefault("description", "") + result.setdefault("version", None) + result.setdefault("timezone", None) + result.setdefault("refresh", None) + result.setdefault("created_by", None) + result.pop("meta", None) result.pop("_skip_text_panels", None) diff --git a/metadata-ingestion/tests/integration/grafana/test_grafana.py b/metadata-ingestion/tests/integration/grafana/test_grafana.py index 8b6f8fe9a2bdc5..9607699fb65ad9 100644 --- a/metadata-ingestion/tests/integration/grafana/test_grafana.py +++ b/metadata-ingestion/tests/integration/grafana/test_grafana.py @@ -19,9 +19,6 @@ FROZEN_TIME = "2024-07-12 12:00:00" -# Expected dashboards that should be provisioned during the test setup -# If new dashboards are added to the test setup, they should be listed here -# to ensure provisioning is complete before running ingestion tests EXPECTED_DASHBOARDS = {"Test Integration Dashboard"} logger = logging.getLogger(__name__) @@ -142,10 +139,8 @@ def loaded_grafana(docker_compose_runner, test_resources_dir): with docker_compose_runner( test_resources_dir / "docker-compose.yml", "grafana" ) as docker_services: - # Wait for all services to be ready wait_for_port(docker_services, "postgres", 5432, timeout=90) - # Prometheus container doesn't have bash, so use a simple HTTP check def check_prometheus_ready(): try: prometheus_port = docker_services.port_for("prometheus", 9090) @@ -179,12 +174,9 @@ def check_prometheus_ready(): def verify_grafana_api_ready(docker_services: pytest_docker.plugin.Services) -> None: """Robust verification that Grafana API is fully accessible after health check passes""" - # Configure requests session with retries session = build_retry_session( total=3, backoff_factor=1, status_forcelist=[500, 502, 503, 504, 429] ) - - # Wait for API endpoints to be fully ready (health check might pass but API still initializing) for attempt in tenacity.Retrying( stop=tenacity.stop_after_attempt(60), wait=tenacity.wait_fixed(3), reraise=True ): @@ -238,11 +230,7 @@ def verify_grafana_fully_ready( def verify_grafana_entities_provisioned(timeout: int = 180) -> None: - """Wait for Grafana entities to be provisioned before running ingestion tests - - This function can be extended in the future to validate other entity types - like data sources, folders, etc. - """ + """Wait for Grafana entities to be provisioned before running ingestion tests""" session = build_retry_session( total=3, backoff_factor=1, status_forcelist=[500, 502, 503, 504, 429] ) diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_models.py b/metadata-ingestion/tests/unit/grafana/test_grafana_models.py index 2ae29f763097c6..7e30eaaaa3e859 100644 --- a/metadata-ingestion/tests/unit/grafana/test_grafana_models.py +++ b/metadata-ingestion/tests/unit/grafana/test_grafana_models.py @@ -206,3 +206,246 @@ def test_folder(): assert folder.id == "1" assert folder.title == "Test Folder" assert folder.description == "Test Description" + + +# Tests for validation fixes - handling missing fields gracefully + + +def test_dashboard_missing_tags_field(): + """Test that dashboard without tags field gets default empty list.""" + dashboard_data = { + "dashboard": { + "uid": "dash1", + "title": "Test Dashboard", + "panels": [], + # Missing 'tags' field - should default to [] + } + } + + dashboard = Dashboard.model_validate(dashboard_data) + assert dashboard.uid == "dash1" + assert dashboard.title == "Test Dashboard" + assert dashboard.tags == [] # Should default to empty list + assert dashboard.description == "" # Should default to empty string + + +def test_dashboard_missing_optional_fields(): + """Test that dashboard with minimal fields gets appropriate defaults.""" + dashboard_data = { + "dashboard": { + "uid": "dash1", + "title": "Test Dashboard", + "panels": [], + # Missing: tags, description, version, timezone, refresh, created_by + } + } + + dashboard = Dashboard.model_validate(dashboard_data) + assert dashboard.uid == "dash1" + assert dashboard.title == "Test Dashboard" + assert dashboard.tags == [] + assert dashboard.description == "" + assert dashboard.version is None + assert dashboard.timezone is None + assert dashboard.refresh is None + assert dashboard.created_by is None + + +def test_panel_missing_optional_fields(): + """Test that panel with minimal fields gets appropriate defaults.""" + panel_data = { + "id": 123, # Integer ID - should be converted to string + "title": "Test Panel", + # Missing: description, targets, transformations, fieldConfig + } + + panel = Panel.model_validate(panel_data) + assert panel.id == "123" # Should be converted to string + assert panel.title == "Test Panel" + assert panel.description == "" # Should default to empty string + assert panel.query_targets == [] # Should default to empty list + assert panel.transformations == [] # Should default to empty list + assert panel.field_config == {} # Should default to empty dict + + +def test_panel_integer_id_conversion(): + """Test that panel IDs are converted from int to string.""" + panel_data = { + "id": 456, # Integer ID + "title": "Test Panel", + "type": "graph", + } + + panel = Panel.model_validate(panel_data) + assert panel.id == "456" + assert isinstance(panel.id, str) + + +def test_dashboard_with_panels_missing_fields(): + """Test dashboard with panels that have missing fields.""" + dashboard_data = { + "dashboard": { + "uid": "dash1", + "title": "Test Dashboard", + "panels": [ + { + "id": 1, # Integer ID + "title": "Panel 1", + # Missing description, targets, etc. + }, + { + "id": "2", # String ID + # Missing title (should be None for text panels) + "type": "text", + }, + ], + } + } + + dashboard = Dashboard.model_validate(dashboard_data) + assert len(dashboard.panels) == 2 + + # First panel + panel1 = dashboard.panels[0] + assert panel1.id == "1" # Converted to string + assert panel1.title == "Panel 1" + assert panel1.description == "" + assert panel1.query_targets == [] + + # Second panel + panel2 = dashboard.panels[1] + assert panel2.id == "2" + assert panel2.title is None # No title provided + assert panel2.type == "text" + assert panel2.description == "" + + +def test_dashboard_from_grafana_api_response(): + """Test parsing a realistic Grafana API response with missing fields.""" + # Simulates a real Grafana API response that might be missing some fields + api_response = { + "dashboard": { + "uid": "abc123", + "title": "Production Dashboard", + "id": 450, + "panels": [ + { + "id": 1, + "title": "CPU Usage", + "type": "graph", + "targets": [{"expr": "cpu_usage", "refId": "A"}], + }, + { + "id": 2, + "type": "text", + # No title field for text panel + "datasource": {"type": "-- Grafana --", "uid": "-- Grafana --"}, + }, + ], + "time": {"from": "now-1h", "to": "now"}, + "refresh": "5s", + "schemaVersion": 30, + "version": 1, + "meta": { + "type": "db", + "canSave": True, + "canEdit": True, + "canAdmin": True, + "canStar": True, + "slug": "production-dashboard", + "url": "/d/abc123/production-dashboard", + "expires": "0001-01-01T00:00:00Z", + "created": "2024-01-01T10:00:00Z", + "updated": "2024-01-02T15:30:00Z", + "updatedBy": "admin", + "createdBy": "admin", + "version": 1, + "hasAcl": False, + "isFolder": False, + "folderId": 0, + "folderUid": "", + "folderTitle": "General", + "folderUrl": "", + "provisioned": False, + "provisionedExternalId": "", + }, + # Notice: missing 'tags' field entirely + } + } + + dashboard = Dashboard.model_validate(api_response) + assert dashboard.uid == "abc123" + assert dashboard.title == "Production Dashboard" + assert dashboard.tags == [] # Should default to empty list + assert dashboard.version == "1" # Should be converted to string + assert dashboard.refresh == "5s" + assert dashboard.folder_id == "0" # Extracted from meta.folderId + assert len(dashboard.panels) == 2 + + # First panel should have defaults applied + panel1 = dashboard.panels[0] + assert panel1.id == "1" + assert panel1.title == "CPU Usage" + assert panel1.description == "" + assert len(panel1.query_targets) == 1 + + # Second panel (text) should handle missing title + panel2 = dashboard.panels[1] + assert panel2.id == "2" + assert panel2.title is None + assert panel2.type == "text" + + +def test_dashboard_refresh_boolean_conversion(): + """Test that boolean refresh values are converted to strings.""" + dashboard_data = { + "dashboard": { + "uid": "dash1", + "title": "Test Dashboard", + "panels": [], + "refresh": False, # Boolean value + } + } + + dashboard = Dashboard.model_validate(dashboard_data) + assert dashboard.refresh == "False" # Should be converted to string + + +def test_nested_panels_with_missing_fields(): + """Test nested panels (in row panels) with missing fields.""" + dashboard_data = { + "dashboard": { + "uid": "dash1", + "title": "Test Dashboard", + "panels": [ + { + "type": "row", + "panels": [ + { + "id": 1, + "title": "Nested Panel 1", + # Missing description, targets, etc. + }, + { + "id": 2, + # Missing title and other fields + "type": "text", + }, + ], + } + ], + } + } + + dashboard = Dashboard.model_validate(dashboard_data) + assert len(dashboard.panels) == 2 # Both nested panels should be extracted + + panel1 = dashboard.panels[0] + assert panel1.id == "1" + assert panel1.title == "Nested Panel 1" + assert panel1.description == "" + + panel2 = dashboard.panels[1] + assert panel2.id == "2" + assert panel2.title is None + assert panel2.type == "text" diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py new file mode 100644 index 00000000000000..9a5efa8c087a6c --- /dev/null +++ b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py @@ -0,0 +1,213 @@ +from typing import Any, Dict + +from datahub.ingestion.source.grafana.models import Dashboard, Panel + + +def test_dashboard_missing_tags_validation(): + """Test dashboard validation when tags field is missing.""" + # Dashboard data without tags field + dashboard_data = { + "id": 450, + "panels": [], + "title": "Some Dashboard", + "uid": "dashboard-uid", + "version": 1, + "folder_id": None, + } + + dashboard = Dashboard.model_validate(dashboard_data) + assert dashboard.uid == "dashboard-uid" + assert dashboard.title == "Some Dashboard" + assert dashboard.tags == [] + assert dashboard.version == "1" + assert dashboard.folder_id is None + + +def test_panel_validation_with_missing_fields(): + """Test panel validation when optional fields are missing.""" + panel_data_list: list[Dict[str, Any]] = [ + { + "id": 1, + "type": "graph", + }, + { + "id": 2, + }, + { + "id": 3, + "title": "Chart with missing datasource", + "type": "graph", + }, + ] + + for panel_data in panel_data_list: + panel = Panel.model_validate(panel_data) + assert panel.id == str(panel_data["id"]) + assert panel.description == "" + assert panel.query_targets == [] + assert panel.transformations == [] + assert panel.field_config == {} + + +def test_dashboard_completely_minimal(): + """Test dashboard with only absolutely required fields.""" + minimal_dashboard = { + "uid": "minimal-dash", + "title": "Minimal Dashboard", + } + + dashboard = Dashboard.model_validate(minimal_dashboard) + assert dashboard.uid == "minimal-dash" + assert dashboard.title == "Minimal Dashboard" + assert dashboard.tags == [] + assert dashboard.description == "" + assert dashboard.panels == [] + assert dashboard.version is None + assert dashboard.timezone is None + assert dashboard.refresh is None + assert dashboard.created_by is None + assert dashboard.folder_id is None + + +def test_panel_completely_minimal(): + """Test panel with only absolutely required fields.""" + minimal_panel = {"id": 999} + + panel = Panel.model_validate(minimal_panel) + assert panel.id == "999" + assert panel.title is None + assert panel.description == "" + assert panel.type is None + assert panel.query_targets == [] + assert panel.datasource_ref is None + assert panel.field_config == {} + assert panel.transformations == [] + + +def test_dashboard_with_incomplete_panels(): + """Test dashboard validation containing panels with missing optional fields.""" + dashboard_data = { + "uid": "incomplete-dash", + "title": "Dashboard with Incomplete Panels", + "panels": [ + { + "id": 1, + "type": "graph", + }, + { + "id": 2, + }, + { + "id": 3, + "title": "Panel with Some Fields", + "type": "table", + }, + ], + } + + dashboard = Dashboard.model_validate(dashboard_data) + assert dashboard.uid == "incomplete-dash" + assert dashboard.title == "Dashboard with Incomplete Panels" + assert dashboard.tags == [] + assert dashboard.description == "" + assert len(dashboard.panels) == 3 + panel1 = dashboard.panels[0] + assert panel1.id == "1" + assert panel1.type == "graph" + assert panel1.title is None + assert panel1.description == "" + assert panel1.query_targets == [] + + panel2 = dashboard.panels[1] + assert panel2.id == "2" + assert panel2.title is None + assert panel2.type is None # Allowed to be None + assert panel2.description == "" + assert panel2.query_targets == [] + + panel3 = dashboard.panels[2] + assert panel3.id == "3" + assert panel3.title == "Panel with Some Fields" + assert panel3.type == "table" + assert panel3.description == "" + assert panel3.query_targets == [] + + +def test_realistic_grafana_api_response(): + """Test validation with a realistic Grafana API response format.""" + # Simulates a typical Grafana API response with some optional fields missing + real_response = { + "dashboard": { + "id": 450, + "uid": "real-dashboard", + "title": "Production Metrics", + "url": "/d/real-dashboard/production-metrics", + "slug": "production-metrics", + "type": "db", + "panels": [ + { + "id": 1, + "title": "CPU Usage", + "type": "graph", + "targets": [{"expr": "cpu_usage_percent", "refId": "A"}], + "datasource": {"type": "prometheus", "uid": "prometheus-uid"}, + }, + { + "id": 2, + "type": "text", + # Text panel with no title - common in Grafana + "datasource": {"type": "-- Grafana --", "uid": "-- Grafana --"}, + }, + ], + "time": {"from": "now-1h", "to": "now"}, + "refresh": "30s", + "schemaVersion": 30, + "version": 5, + "meta": { + "type": "db", + "canSave": True, + "canEdit": True, + "slug": "production-metrics", + "url": "/d/real-dashboard/production-metrics", + "expires": "0001-01-01T00:00:00Z", + "created": "2024-01-01T10:00:00Z", + "updated": "2024-01-15T14:30:00Z", + "updatedBy": "admin", + "createdBy": "admin", + "version": 5, + "hasAcl": False, + "isFolder": False, + "folderId": 1, + "folderUid": "general", + "folderTitle": "General", + "folderUrl": "", + "provisioned": False, + "provisionedExternalId": "", + }, + # Note: 'tags' and 'description' fields are not included + } + } + + dashboard = Dashboard.model_validate(real_response) + assert dashboard.uid == "real-dashboard" + assert dashboard.title == "Production Metrics" + assert dashboard.tags == [] # Should default to empty list + assert dashboard.description == "" # Should default to empty string + assert dashboard.version == "5" # Should be converted to string + assert dashboard.refresh == "30s" + assert dashboard.folder_id == "1" # Extracted from meta + assert len(dashboard.panels) == 2 + + # Verify panels are processed correctly + cpu_panel = dashboard.panels[0] + assert cpu_panel.id == "1" + assert cpu_panel.title == "CPU Usage" + assert cpu_panel.type == "graph" + assert cpu_panel.description == "" # Default + assert len(cpu_panel.query_targets) == 1 + + text_panel = dashboard.panels[1] + assert text_panel.id == "2" + assert text_panel.title is None # Text panels often have no title + assert text_panel.type == "text" + assert text_panel.description == "" # Default From 2c0b1e45a5821d534bee8f99f81caf37e19e61c0 Mon Sep 17 00:00:00 2001 From: Jonny Dixon Date: Tue, 18 Nov 2025 18:03:08 +0000 Subject: [PATCH 03/15] fixing issues --- .../ingestion/source/grafana/field_utils.py | 19 +++-- .../ingestion/source/grafana/models.py | 74 ++++++++++++---- .../unit/grafana/test_grafana_field_utils.py | 12 +++ .../unit/grafana/test_grafana_validation.py | 85 +++++++++++++++++++ 4 files changed, 167 insertions(+), 23 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py index 2164aeca4d7889..026a6a4a0d8bb8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py @@ -77,8 +77,8 @@ def extract_raw_sql_fields( schema_aware = False if panel and panel.datasource_ref and connection_to_platform_map: - ds_type = panel.datasource_ref.type or "unknown" - ds_uid = panel.datasource_ref.uid or "unknown" + ds_type = getattr(panel.datasource_ref, "type", None) or "unknown" + ds_uid = getattr(panel.datasource_ref, "uid", None) or "unknown" # Try to find mapping by datasource UID first, then by type platform_config = connection_to_platform_map.get( @@ -227,13 +227,16 @@ def extract_fields_from_panel( def extract_fields_from_targets( - targets: List[Dict[str, Any]], + targets: Optional[List[Dict[str, Any]]], panel: Optional[Panel] = None, connection_to_platform_map: Optional[Dict[str, Any]] = None, graph: Optional[DataHubGraph] = None, report: Optional[Any] = None, ) -> List[SchemaFieldClass]: """Extract fields from panel targets.""" + if targets is None: + return [] + fields = [] for target in targets: fields.extend(extract_sql_column_fields(target)) @@ -261,9 +264,12 @@ def extract_time_format_fields(target: Dict[str, Any]) -> List[SchemaFieldClass] def get_fields_from_field_config( - field_config: Dict[str, Any], + field_config: Optional[Dict[str, Any]], ) -> List[SchemaFieldClass]: """Extract fields from field configuration.""" + if field_config is None: + return [] + fields = [] defaults = field_config.get("defaults", {}) unit = defaults.get("unit") @@ -290,9 +296,12 @@ def get_fields_from_field_config( def get_fields_from_transformations( - transformations: List[Dict[str, Any]], + transformations: Optional[List[Dict[str, Any]]], ) -> List[SchemaFieldClass]: """Extract fields from transformations.""" + if transformations is None: + return [] + fields = [] for transform in transformations: if transform.get("type") == "organize": diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py index 15e6e9f3d49a23..40242e6067e51f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py @@ -54,19 +54,49 @@ class Panel(_GrafanaBaseModel): field_config: GrafanaFieldConfig = Field(default_factory=dict, alias="fieldConfig") transformations: List[GrafanaTransformation] = Field(default_factory=list) + @staticmethod + def _ensure_dict_field( + data: Dict[str, Any], field_name: str, default: Dict[str, Any] + ) -> None: + """Ensure a field is a dict, converting None to the default dict.""" + if data.get(field_name) is None: + data[field_name] = default + else: + data.setdefault(field_name, default) + + @staticmethod + def _ensure_list_field( + data: Dict[str, Any], field_name: str, default: List[Any] + ) -> None: + """Ensure a field is a list, converting None to the default list.""" + if data.get(field_name) is None: + data[field_name] = default + else: + data.setdefault(field_name, default) + + @staticmethod + def _normalize_id_field(data: Dict[str, Any]) -> None: + """Convert integer ID to string for consistency.""" + if "id" in data and isinstance(data["id"], int): + data["id"] = str(data["id"]) + @model_validator(mode="before") @classmethod def ensure_panel_defaults(cls, data: Any) -> Dict[str, Any]: """Set defaults for optional fields and normalize data types.""" if isinstance(data, dict): result = dict(data) + + # Set basic defaults result.setdefault("description", "") - result.setdefault("targets", []) - result.setdefault("transformations", []) - result.setdefault("fieldConfig", {}) - if "id" in result and isinstance(result["id"], int): - result["id"] = str(result["id"]) + # Ensure complex fields are never None + cls._ensure_list_field(result, "targets", []) + cls._ensure_list_field(result, "transformations", []) + cls._ensure_dict_field(result, "fieldConfig", {}) + + # Normalize data types + cls._normalize_id_field(result) return result return data @@ -94,9 +124,9 @@ def extract_panels( """Extract panels, including nested ones, skipping invalid panels.""" panels: List[Panel] = [] for panel_data in panels_data: - if panel_data.get("type") == "row" and "panels" in panel_data: - for p in panel_data["panels"]: - if p.get("type") != "row": + if panel_data.get("type") == "row" and panel_data.get("panels"): + for p in panel_data.get("panels", []): + if p and p.get("type") != "row": if skip_text_panels and p.get("type") == "text": continue try: @@ -126,6 +156,22 @@ def extract_panels( ) return panels + @staticmethod + def _set_dashboard_defaults(result: Dict[str, Any]) -> None: + """Set default values for optional dashboard fields.""" + result.setdefault("tags", []) + result.setdefault("description", "") + result.setdefault("version", None) + result.setdefault("timezone", None) + result.setdefault("refresh", None) + result.setdefault("created_by", None) + + @staticmethod + def _cleanup_dashboard_metadata(result: Dict[str, Any]) -> None: + """Remove internal metadata fields from dashboard data.""" + result.pop("meta", None) + result.pop("_skip_text_panels", None) + @field_validator("refresh", mode="before") @classmethod def convert_refresh_to_string(cls, v: Any) -> Optional[str]: @@ -161,16 +207,8 @@ def extract_dashboard_data(cls, data: Any) -> Dict[str, Any]: if folder_id is not None: result["folder_id"] = folder_id - # Set defaults for optional fields that may be missing from API responses - result.setdefault("tags", []) - result.setdefault("description", "") - result.setdefault("version", None) - result.setdefault("timezone", None) - result.setdefault("refresh", None) - result.setdefault("created_by", None) - - result.pop("meta", None) - result.pop("_skip_text_panels", None) + cls._set_dashboard_defaults(result) + cls._cleanup_dashboard_metadata(result) return result diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py index 60dcee21eb77a6..eacab847060c39 100644 --- a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py +++ b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py @@ -116,3 +116,15 @@ def test_get_fields_from_transformations(): assert len(fields) == 2 field_paths = {f.fieldPath for f in fields} assert field_paths == {"user", "value"} + + +def test_get_fields_from_field_config_none(): + """Test that get_fields_from_field_config handles None input gracefully.""" + fields = get_fields_from_field_config(None) + assert fields == [] + + +def test_get_fields_from_field_config_empty(): + """Test that get_fields_from_field_config handles empty dict input.""" + fields = get_fields_from_field_config({}) + assert fields == [] diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py index 9a5efa8c087a6c..37f689169beb00 100644 --- a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py +++ b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py @@ -84,6 +84,64 @@ def test_panel_completely_minimal(): assert panel.transformations == [] +def test_panel_with_null_field_config(): + """Test panel validation when fieldConfig is explicitly null.""" + panel_data = { + "id": 123, + "fieldConfig": None, + } + + panel = Panel.model_validate(panel_data) + assert panel.id == "123" + assert panel.field_config == {} # Should be converted to empty dict + + +def test_panel_with_null_transformations(): + """Test panel validation when transformations is explicitly null.""" + panel_data = { + "id": 456, + "transformations": None, + } + + panel = Panel.model_validate(panel_data) + assert panel.id == "456" + assert panel.transformations == [] # Should be converted to empty list + + +def test_panel_with_null_targets(): + """Test panel validation when targets is explicitly null.""" + panel_data = { + "id": 789, + "targets": None, + } + + panel = Panel.model_validate(panel_data) + assert panel.id == "789" + assert panel.query_targets == [] # Should be converted to empty list + + +def test_panel_with_all_null_optional_fields(): + """Test panel validation when all optional fields are null.""" + panel_data = { + "id": 999, + "fieldConfig": None, + "transformations": None, + "targets": None, + "datasource": None, + "title": None, + "type": None, + } + + panel = Panel.model_validate(panel_data) + assert panel.id == "999" + assert panel.field_config == {} + assert panel.transformations == [] + assert panel.query_targets == [] + assert panel.datasource_ref is None + assert panel.title is None + assert panel.type is None + + def test_dashboard_with_incomplete_panels(): """Test dashboard validation containing panels with missing optional fields.""" dashboard_data = { @@ -133,6 +191,33 @@ def test_dashboard_with_incomplete_panels(): assert panel3.query_targets == [] +def test_text_panel_like_real_grafana(): + """Test a text panel structure like those found in real Grafana dashboards.""" + # This mimics the structure from our integration test dashboard + text_panel_data = { + "id": 1, + "type": "text", + "title": "Dashboard Information", + "gridPos": {"x": 0, "y": 0, "w": 24, "h": 3}, + "options": { + "content": "# Test Integration Dashboard\nThis dashboard contains test panels.", + "mode": "markdown", + }, + # Note: No fieldConfig, targets, transformations, datasource, or description + } + + # Should validate successfully with defaults applied + panel = Panel.model_validate(text_panel_data) + assert panel.id == "1" + assert panel.type == "text" + assert panel.title == "Dashboard Information" + assert panel.description == "" # Default applied + assert panel.field_config == {} # Default applied + assert panel.query_targets == [] # Default applied + assert panel.transformations == [] # Default applied + assert panel.datasource_ref is None + + def test_realistic_grafana_api_response(): """Test validation with a realistic Grafana API response format.""" # Simulates a typical Grafana API response with some optional fields missing From 5afa92311e9ed7d31432f402a60a308f712d4e0a Mon Sep 17 00:00:00 2001 From: Jonny Dixon Date: Tue, 18 Nov 2025 18:46:29 +0000 Subject: [PATCH 04/15] defensive against nones and nulls --- .../ingestion/source/grafana/field_utils.py | 67 +++++++++++-------- .../ingestion/source/grafana/models.py | 28 ++++++-- .../unit/grafana/test_grafana_field_utils.py | 31 +++++++-- 3 files changed, 88 insertions(+), 38 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py index 026a6a4a0d8bb8..ba3a8f5c1f3b93 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py @@ -185,13 +185,15 @@ def _extract_raw_sql_fields_fallback(target: Dict[str, Any]) -> List[SchemaField # Clean up any remaining quotes or parentheses field_name = field_name.strip("\"'()") - fields.append( - SchemaFieldClass( - fieldPath=field_name, - type=SchemaFieldDataTypeClass(type=StringTypeClass()), - nativeDataType="sql_column", + # Only create field if field_name is not empty + if field_name: + fields.append( + SchemaFieldClass( + fieldPath=field_name, + type=SchemaFieldDataTypeClass(type=StringTypeClass()), + nativeDataType="sql_column", + ) ) - ) return fields @@ -208,13 +210,28 @@ def extract_fields_from_panel( ) -> List[SchemaFieldClass]: """Extract all fields from a panel.""" fields = [] - fields.extend( - extract_fields_from_targets( - panel.query_targets, panel, connection_to_platform_map, graph, report + + # Extract fields from targets (only if there are targets) + if panel.safe_query_targets: + target_fields = extract_fields_from_targets( + panel.safe_query_targets, panel, connection_to_platform_map, graph, report + ) + if target_fields: + fields.extend(target_fields) + + # Extract fields from field config (only if there's meaningful config) + if panel.safe_field_config: + field_config_fields = get_fields_from_field_config(panel.safe_field_config) + if field_config_fields: + fields.extend(field_config_fields) + + # Extract fields from transformations (only if there are transformations) + if panel.safe_transformations: + transformation_fields = get_fields_from_transformations( + panel.safe_transformations ) - ) - fields.extend(get_fields_from_field_config(panel.field_config)) - fields.extend(get_fields_from_transformations(panel.transformations)) + if transformation_fields: + fields.extend(transformation_fields) # Track schema field extraction if report: @@ -227,15 +244,13 @@ def extract_fields_from_panel( def extract_fields_from_targets( - targets: Optional[List[Dict[str, Any]]], + targets: List[Dict[str, Any]], panel: Optional[Panel] = None, connection_to_platform_map: Optional[Dict[str, Any]] = None, graph: Optional[DataHubGraph] = None, report: Optional[Any] = None, ) -> List[SchemaFieldClass]: """Extract fields from panel targets.""" - if targets is None: - return [] fields = [] for target in targets: @@ -264,11 +279,9 @@ def extract_time_format_fields(target: Dict[str, Any]) -> List[SchemaFieldClass] def get_fields_from_field_config( - field_config: Optional[Dict[str, Any]], + field_config: Dict[str, Any], ) -> List[SchemaFieldClass]: """Extract fields from field configuration.""" - if field_config is None: - return [] fields = [] defaults = field_config.get("defaults", {}) @@ -296,21 +309,21 @@ def get_fields_from_field_config( def get_fields_from_transformations( - transformations: Optional[List[Dict[str, Any]]], + transformations: List[Dict[str, Any]], ) -> List[SchemaFieldClass]: """Extract fields from transformations.""" - if transformations is None: - return [] fields = [] for transform in transformations: if transform.get("type") == "organize": for field_name in transform.get("options", {}).get("indexByName", {}): - fields.append( - SchemaFieldClass( - fieldPath=field_name, - type=SchemaFieldDataTypeClass(type=StringTypeClass()), - nativeDataType="transformed", + # Only create field if field_name is not empty + if field_name and field_name.strip(): + fields.append( + SchemaFieldClass( + fieldPath=field_name.strip(), + type=SchemaFieldDataTypeClass(type=StringTypeClass()), + nativeDataType="transformed", + ) ) - ) return fields diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py index 40242e6067e51f..f7d1b5d387a1da 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py @@ -17,8 +17,9 @@ logger = logging.getLogger(__name__) +# Type aliases for Grafana data structures GrafanaQueryTarget = Dict[str, Any] -GrafanaFieldConfig = Dict[str, Any] +GrafanaFieldConfig = Dict[str, Any] # Never None, always a dict (possibly empty) GrafanaTransformation = Dict[str, Any] @@ -54,12 +55,28 @@ class Panel(_GrafanaBaseModel): field_config: GrafanaFieldConfig = Field(default_factory=dict, alias="fieldConfig") transformations: List[GrafanaTransformation] = Field(default_factory=list) + @property + def safe_field_config(self) -> GrafanaFieldConfig: + """Get field_config, guaranteed to be a dict (never None).""" + return self.field_config or {} + + @property + def safe_query_targets(self) -> List[GrafanaQueryTarget]: + """Get query_targets, guaranteed to be a list (never None).""" + return self.query_targets or [] + + @property + def safe_transformations(self) -> List[GrafanaTransformation]: + """Get transformations, guaranteed to be a list (never None).""" + return self.transformations or [] + @staticmethod def _ensure_dict_field( data: Dict[str, Any], field_name: str, default: Dict[str, Any] ) -> None: - """Ensure a field is a dict, converting None to the default dict.""" - if data.get(field_name) is None: + """Ensure a field is a dict, converting None/invalid types to the default dict.""" + value = data.get(field_name) + if value is None or not isinstance(value, dict): data[field_name] = default else: data.setdefault(field_name, default) @@ -68,8 +85,9 @@ def _ensure_dict_field( def _ensure_list_field( data: Dict[str, Any], field_name: str, default: List[Any] ) -> None: - """Ensure a field is a list, converting None to the default list.""" - if data.get(field_name) is None: + """Ensure a field is a list, converting None/invalid types to the default list.""" + value = data.get(field_name) + if value is None or not isinstance(value, list): data[field_name] = default else: data.setdefault(field_name, default) diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py index eacab847060c39..b306bfc06409f6 100644 --- a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py +++ b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py @@ -118,13 +118,32 @@ def test_get_fields_from_transformations(): assert field_paths == {"user", "value"} -def test_get_fields_from_field_config_none(): - """Test that get_fields_from_field_config handles None input gracefully.""" - fields = get_fields_from_field_config(None) - assert fields == [] - - def test_get_fields_from_field_config_empty(): """Test that get_fields_from_field_config handles empty dict input.""" fields = get_fields_from_field_config({}) assert fields == [] + + +def test_extract_fields_from_panel_with_empty_fields(): + """Test that extract_fields_from_panel handles panels with empty fields efficiently.""" + from datahub.ingestion.source.grafana.field_utils import extract_fields_from_panel + from datahub.ingestion.source.grafana.models import Panel + + # Create a text panel with no targets, fieldConfig, or transformations + panel_data = { + "id": 1, + "type": "text", + "title": "Test Panel", + } + + panel = Panel.model_validate(panel_data) + + # Verify safe properties work + assert panel.safe_field_config == {} + assert panel.safe_query_targets == [] + assert panel.safe_transformations == [] + + # Should extract no fields but not crash + fields = extract_fields_from_panel(panel) + assert isinstance(fields, list) + assert len(fields) == 0 # No fields to extract from a text panel From 50696b69e6fa8616de42f24744e3affa1664e45d Mon Sep 17 00:00:00 2001 From: Jonny Dixon Date: Tue, 18 Nov 2025 19:04:12 +0000 Subject: [PATCH 05/15] cleaning --- .../ingestion/source/grafana/field_utils.py | 15 ++-- .../ingestion/source/grafana/models.py | 6 +- .../unit/grafana/test_grafana_field_utils.py | 77 +++++++++++++++++++ 3 files changed, 87 insertions(+), 11 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py index ba3a8f5c1f3b93..b885f41b13a256 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py @@ -77,13 +77,15 @@ def extract_raw_sql_fields( schema_aware = False if panel and panel.datasource_ref and connection_to_platform_map: - ds_type = getattr(panel.datasource_ref, "type", None) or "unknown" - ds_uid = getattr(panel.datasource_ref, "uid", None) or "unknown" + ds_type = panel.datasource_ref.type + ds_uid = panel.datasource_ref.uid - # Try to find mapping by datasource UID first, then by type - platform_config = connection_to_platform_map.get( - ds_uid - ) or connection_to_platform_map.get(ds_type) + # Try to find mapping by datasource UID first (if it exists), then by type + platform_config = None + if ds_uid: + platform_config = connection_to_platform_map.get(ds_uid) + if not platform_config and ds_type: + platform_config = connection_to_platform_map.get(ds_type) if platform_config: platform = platform_config.platform @@ -282,7 +284,6 @@ def get_fields_from_field_config( field_config: Dict[str, Any], ) -> List[SchemaFieldClass]: """Extract fields from field configuration.""" - fields = [] defaults = field_config.get("defaults", {}) unit = defaults.get("unit") diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py index f7d1b5d387a1da..c5be3bfbee958e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py @@ -78,8 +78,7 @@ def _ensure_dict_field( value = data.get(field_name) if value is None or not isinstance(value, dict): data[field_name] = default - else: - data.setdefault(field_name, default) + # If value exists and is already a dict, leave it as is @staticmethod def _ensure_list_field( @@ -89,8 +88,7 @@ def _ensure_list_field( value = data.get(field_name) if value is None or not isinstance(value, list): data[field_name] = default - else: - data.setdefault(field_name, default) + # If value exists and is already a list, leave it as is @staticmethod def _normalize_id_field(data: Dict[str, Any]) -> None: diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py index b306bfc06409f6..bfdb7a333a820f 100644 --- a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py +++ b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py @@ -147,3 +147,80 @@ def test_extract_fields_from_panel_with_empty_fields(): fields = extract_fields_from_panel(panel) assert isinstance(fields, list) assert len(fields) == 0 # No fields to extract from a text panel + + +def test_datasource_ref_field_access(): + """Test that datasource_ref fields are accessed correctly without getattr.""" + from datahub.ingestion.source.grafana.models import Panel + + # Test panel with datasource_ref having both type and uid + panel_with_datasource = Panel.model_validate( + { + "id": 1, + "type": "graph", + "datasource": {"type": "prometheus", "uid": "prometheus-uid-123"}, + } + ) + + # Verify we can access fields directly + assert panel_with_datasource.datasource_ref is not None + assert panel_with_datasource.datasource_ref.type == "prometheus" + assert panel_with_datasource.datasource_ref.uid == "prometheus-uid-123" + + # Test panel with partial datasource_ref (only type) + panel_with_partial_datasource = Panel.model_validate( + { + "id": 2, + "type": "graph", + "datasource": { + "type": "mysql" + # No uid field + }, + } + ) + + assert panel_with_partial_datasource.datasource_ref is not None + assert panel_with_partial_datasource.datasource_ref.type == "mysql" + assert panel_with_partial_datasource.datasource_ref.uid is None + + # Test panel with no datasource_ref + panel_without_datasource = Panel.model_validate( + { + "id": 3, + "type": "text", + # No datasource field + } + ) + + assert panel_without_datasource.datasource_ref is None + + +def test_extract_raw_sql_fields_with_text_panel(): + """Test that extract_raw_sql_fields handles text panels (no datasource) correctly.""" + from datahub.ingestion.source.grafana.field_utils import extract_raw_sql_fields + from datahub.ingestion.source.grafana.models import Panel + + # Create a text panel with no datasource (like in real Grafana) + text_panel = Panel.model_validate( + { + "id": 1, + "type": "text", + "title": "Text Panel", + # No datasource field - this is normal for text panels + } + ) + + # Should not crash when processing a target with rawSql but no datasource + target_with_sql = {"rawSql": "SELECT * FROM test_table"} + + # This should work without error, even though panel.datasource_ref is None + fields = extract_raw_sql_fields( + target=target_with_sql, + panel=text_panel, + connection_to_platform_map={"postgres": "some_config"}, + graph=None, + report=None, + ) + + # Should return some fields (fallback parsing) or empty list, but not crash + assert isinstance(fields, list) From 97ae68e8d75bd079481fc44031c0caa8b1862558 Mon Sep 17 00:00:00 2001 From: Jonny Dixon Date: Tue, 18 Nov 2025 19:17:34 +0000 Subject: [PATCH 06/15] defensive checks --- .../ingestion/source/grafana/field_utils.py | 15 ++++--- .../ingestion/source/grafana/models.py | 14 ++++++- .../unit/grafana/test_grafana_field_utils.py | 6 +++ .../unit/grafana/test_grafana_validation.py | 39 +++++++++++++++++++ 4 files changed, 66 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py index b885f41b13a256..3b7d318d78bb91 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py @@ -221,11 +221,10 @@ def extract_fields_from_panel( if target_fields: fields.extend(target_fields) - # Extract fields from field config (only if there's meaningful config) - if panel.safe_field_config: - field_config_fields = get_fields_from_field_config(panel.safe_field_config) - if field_config_fields: - fields.extend(field_config_fields) + # Extract fields from field config + field_config_fields = get_fields_from_field_config(panel.field_config) + if field_config_fields: + fields.extend(field_config_fields) # Extract fields from transformations (only if there are transformations) if panel.safe_transformations: @@ -281,9 +280,12 @@ def extract_time_format_fields(target: Dict[str, Any]) -> List[SchemaFieldClass] def get_fields_from_field_config( - field_config: Dict[str, Any], + field_config: Optional[Dict[str, Any]], ) -> List[SchemaFieldClass]: """Extract fields from field configuration.""" + if field_config is None: + return [] + fields = [] defaults = field_config.get("defaults", {}) unit = defaults.get("unit") @@ -295,6 +297,7 @@ def get_fields_from_field_config( nativeDataType="value", ) ) + for override in field_config.get("overrides", []): if override.get("matcher", {}).get("id") == "byName": field_name = override.get("matcher", {}).get("options") diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py index c5be3bfbee958e..026bc84ba53de2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py @@ -103,8 +103,18 @@ def ensure_panel_defaults(cls, data: Any) -> Dict[str, Any]: if isinstance(data, dict): result = dict(data) - # Set basic defaults - result.setdefault("description", "") + # Set basic defaults - handle None values + if result.get("description") is None: + result["description"] = "" + + # Handle datasource field - convert invalid types to None + datasource = result.get("datasource") + if isinstance(datasource, str): + # Handle template variables like '$datasource' or other string values + result["datasource"] = None + elif datasource is not None and not isinstance(datasource, dict): + # Handle any other invalid types + result["datasource"] = None # Ensure complex fields are never None cls._ensure_list_field(result, "targets", []) diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py index bfdb7a333a820f..5d9774904c9715 100644 --- a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py +++ b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py @@ -124,6 +124,12 @@ def test_get_fields_from_field_config_empty(): assert fields == [] +def test_get_fields_from_field_config_none(): + """Test that get_fields_from_field_config handles None input gracefully.""" + fields = get_fields_from_field_config(None) + assert fields == [] + + def test_extract_fields_from_panel_with_empty_fields(): """Test that extract_fields_from_panel handles panels with empty fields efficiently.""" from datahub.ingestion.source.grafana.field_utils import extract_fields_from_panel diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py index 37f689169beb00..9ddcf4b69d0e61 100644 --- a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py +++ b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py @@ -218,6 +218,45 @@ def test_text_panel_like_real_grafana(): assert panel.datasource_ref is None +def test_panel_with_null_description(): + """Test that panels with explicit None description are handled correctly.""" + panel_data = { + "id": 18, + "type": "timeseries", + "title": "Test Panel", + "description": None, # Explicit None value from Grafana API + "datasource": {"type": "prometheus", "uid": "test-uid"}, + "fieldConfig": {"defaults": {"unit": "short"}}, + "targets": [{"expr": "up"}], + } + + # Should validate successfully with None converted to empty string + panel = Panel.model_validate(panel_data) + assert panel.description == "" # None converted to empty string + assert panel.id == "18" + assert panel.type == "timeseries" + assert panel.title == "Test Panel" + + +def test_panel_with_string_datasource(): + """Test that panels with string datasource (template variables) are handled correctly.""" + panel_data = { + "id": 35, + "type": "timeseries", + "title": "Sent records", + "datasource": "$datasource", # String template variable from Grafana + "fieldConfig": {"defaults": {"unit": "short"}}, + "targets": [{"datasource": "$datasource", "expr": "up"}], + } + + # Should validate successfully with string datasource converted to None + panel = Panel.model_validate(panel_data) + assert panel.datasource_ref is None # String converted to None + assert panel.id == "35" + assert panel.type == "timeseries" + assert panel.title == "Sent records" + + def test_realistic_grafana_api_response(): """Test validation with a realistic Grafana API response format.""" # Simulates a typical Grafana API response with some optional fields missing From 302b3e31bb4c5336ef06d10887d94d301deca604 Mon Sep 17 00:00:00 2001 From: Jonny Dixon Date: Tue, 18 Nov 2025 19:32:50 +0000 Subject: [PATCH 07/15] catering for no panel id --- .../ingestion/source/grafana/models.py | 11 ++++-- .../tests/unit/grafana/test_grafana_models.py | 14 +++++--- .../unit/grafana/test_grafana_validation.py | 35 +++++++++++++++++++ 3 files changed, 53 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py index 026bc84ba53de2..fe4da6b35259b0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py @@ -92,8 +92,15 @@ def _ensure_list_field( @staticmethod def _normalize_id_field(data: Dict[str, Any]) -> None: - """Convert integer ID to string for consistency.""" - if "id" in data and isinstance(data["id"], int): + """Convert integer ID to string and generate fallback ID if missing.""" + if "id" not in data or data["id"] is None: + # Generate a fallback ID based on panel type and title + panel_type = data.get("type", "unknown") + title = data.get("title", "untitled") + # Create a simple hash-like ID from type and title + fallback_id = f"{panel_type}_{hash(title) % 10000}" + data["id"] = fallback_id + elif isinstance(data["id"], int): data["id"] = str(data["id"]) @model_validator(mode="before") diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_models.py b/metadata-ingestion/tests/unit/grafana/test_grafana_models.py index 7e30eaaaa3e859..08d7883ae06952 100644 --- a/metadata-ingestion/tests/unit/grafana/test_grafana_models.py +++ b/metadata-ingestion/tests/unit/grafana/test_grafana_models.py @@ -130,7 +130,7 @@ def test_dashboard_with_text_panel(): def test_dashboard_with_invalid_panel_skips(): - """Test that invalid panels are skipped with a warning instead of failing.""" + """Test that panels with missing IDs get generated fallback IDs.""" dashboard_data = { "dashboard": { "uid": "dash1", @@ -139,7 +139,7 @@ def test_dashboard_with_invalid_panel_skips(): "version": "1", "panels": [ {"id": "1", "title": "Valid Panel", "type": "graph"}, - # Missing required 'id' field - should be skipped + # Missing 'id' field - should get generated ID {"title": "Invalid Panel", "type": "graph"}, {"id": "3", "title": "Another Valid Panel", "type": "table"}, ], @@ -148,10 +148,14 @@ def test_dashboard_with_invalid_panel_skips(): } dashboard = Dashboard.model_validate(dashboard_data) - # Should have 2 panels (invalid one is skipped) - assert len(dashboard.panels) == 2 + # Should have 3 panels (missing ID gets generated) + assert len(dashboard.panels) == 3 assert dashboard.panels[0].title == "Valid Panel" - assert dashboard.panels[1].title == "Another Valid Panel" + assert dashboard.panels[0].id == "1" + assert dashboard.panels[1].title == "Invalid Panel" + assert dashboard.panels[1].id.startswith("graph_") # Generated ID + assert dashboard.panels[2].title == "Another Valid Panel" + assert dashboard.panels[2].id == "3" def test_dashboard_skip_text_panels(): diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py index 9ddcf4b69d0e61..78243ae5ef192c 100644 --- a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py +++ b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py @@ -257,6 +257,41 @@ def test_panel_with_string_datasource(): assert panel.title == "Sent records" +def test_panel_with_missing_id(): + """Test that panels with missing id field get a generated fallback ID.""" + panel_data = { + # No id field - common in some text panels + "type": "text", + "title": "Performance Testing (k6)", + "options": {"content": "# Performance Testing", "mode": "markdown"}, + "gridPos": {"h": 4, "w": 24, "x": 0, "y": 0}, + } + + # Should validate successfully with generated ID + panel = Panel.model_validate(panel_data) + assert panel.id is not None # ID was generated + assert panel.id.startswith("text_") # Generated based on type + assert panel.type == "text" + assert panel.title == "Performance Testing (k6)" + + +def test_panel_with_null_id(): + """Test that panels with explicit None id field get a generated fallback ID.""" + panel_data = { + "id": None, # Explicit None + "type": "text", + "title": "Test Panel", + "options": {"content": "Test content"}, + } + + # Should validate successfully with generated ID + panel = Panel.model_validate(panel_data) + assert panel.id is not None # ID was generated + assert panel.id.startswith("text_") # Generated based on type + assert panel.type == "text" + assert panel.title == "Test Panel" + + def test_realistic_grafana_api_response(): """Test validation with a realistic Grafana API response format.""" # Simulates a typical Grafana API response with some optional fields missing From e2a35b8e7dacace9465d8aae313c3c1fb3eff6d9 Mon Sep 17 00:00:00 2001 From: Jonny Dixon Date: Tue, 18 Nov 2025 19:36:02 +0000 Subject: [PATCH 08/15] deterministic fallback for multiple panels with no id --- .../ingestion/source/grafana/models.py | 26 ++++++++- .../unit/grafana/test_grafana_validation.py | 56 +++++++++++++++++++ 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py index fe4da6b35259b0..86a55c6bb437a9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/models.py @@ -94,11 +94,31 @@ def _ensure_list_field( def _normalize_id_field(data: Dict[str, Any]) -> None: """Convert integer ID to string and generate fallback ID if missing.""" if "id" not in data or data["id"] is None: - # Generate a fallback ID based on panel type and title + # Generate a deterministic fallback ID based on multiple panel properties panel_type = data.get("type", "unknown") title = data.get("title", "untitled") - # Create a simple hash-like ID from type and title - fallback_id = f"{panel_type}_{hash(title) % 10000}" + + # Include additional properties for uniqueness + grid_pos = data.get("gridPos", {}) + x = grid_pos.get("x", 0) + y = grid_pos.get("y", 0) + w = grid_pos.get("w", 0) + h = grid_pos.get("h", 0) + + # Create a deterministic identifier from multiple properties + # This ensures uniqueness even for panels with identical type/title + identifier_parts = [ + panel_type, + title, + str(x), + str(y), + str(w), + str(h), # Grid position for uniqueness + ] + identifier_string = "_".join(identifier_parts) + + # Use hash for consistent ID generation across runs + fallback_id = f"{panel_type}_{abs(hash(identifier_string)) % 100000}" data["id"] = fallback_id elif isinstance(data["id"], int): data["id"] = str(data["id"]) diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py index 78243ae5ef192c..ae741e44e93528 100644 --- a/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py +++ b/metadata-ingestion/tests/unit/grafana/test_grafana_validation.py @@ -292,6 +292,62 @@ def test_panel_with_null_id(): assert panel.title == "Test Panel" +def test_multiple_panels_without_id_or_title(): + """Test that multiple panels without ID or title get unique, deterministic IDs.""" + # Two panels with no ID, no title, but different grid positions + panel1_data = { + "type": "text", + "gridPos": {"x": 0, "y": 0, "w": 12, "h": 4}, + "options": {"content": "Panel 1"}, + } + + panel2_data = { + "type": "text", + "gridPos": {"x": 12, "y": 0, "w": 12, "h": 4}, + "options": {"content": "Panel 2"}, + } + + # Validate both panels + panel1 = Panel.model_validate(panel1_data) + panel2 = Panel.model_validate(panel2_data) + + # Both should have generated IDs + assert panel1.id is not None + assert panel2.id is not None + assert panel1.id.startswith("text_") + assert panel2.id.startswith("text_") + + # IDs should be different due to different grid positions + assert panel1.id != panel2.id + + # IDs should be deterministic - same input produces same ID + panel1_duplicate = Panel.model_validate(panel1_data) + panel2_duplicate = Panel.model_validate(panel2_data) + assert panel1.id == panel1_duplicate.id + assert panel2.id == panel2_duplicate.id + + +def test_panels_identical_except_position(): + """Test that identical panels in different positions get different IDs.""" + base_panel_data = { + "type": "text", + "title": "Same Title", + "options": {"content": "Same content"}, + } + + # Same panel in different positions + panel1_data = {**base_panel_data, "gridPos": {"x": 0, "y": 0, "w": 12, "h": 4}} + panel2_data = {**base_panel_data, "gridPos": {"x": 0, "y": 4, "w": 12, "h": 4}} + + panel1 = Panel.model_validate(panel1_data) + panel2 = Panel.model_validate(panel2_data) + + # Should have different IDs due to different positions + assert panel1.id != panel2.id + assert panel1.id.startswith("text_") + assert panel2.id.startswith("text_") + + def test_realistic_grafana_api_response(): """Test validation with a realistic Grafana API response format.""" # Simulates a typical Grafana API response with some optional fields missing From afad92b95b11ec0308d34ac6aecb0593fecfab5b Mon Sep 17 00:00:00 2001 From: Jonny Dixon Date: Tue, 18 Nov 2025 19:47:24 +0000 Subject: [PATCH 09/15] guarantees that string config is always populated --- .../ingestion/source/grafana/field_utils.py | 22 ++++++++++++--- .../unit/grafana/test_grafana_field_utils.py | 28 +++++++++++++++++-- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py index 3b7d318d78bb91..e4dcf5680aff0c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py @@ -221,8 +221,8 @@ def extract_fields_from_panel( if target_fields: fields.extend(target_fields) - # Extract fields from field config - field_config_fields = get_fields_from_field_config(panel.field_config) + # Extract fields from field config - use safe property to ensure non-None + field_config_fields = get_fields_from_field_config(panel.safe_field_config) if field_config_fields: fields.extend(field_config_fields) @@ -280,10 +280,15 @@ def extract_time_format_fields(target: Dict[str, Any]) -> List[SchemaFieldClass] def get_fields_from_field_config( - field_config: Optional[Dict[str, Any]], + field_config: Dict[str, Any], ) -> List[SchemaFieldClass]: """Extract fields from field configuration.""" + # Ultimate safety check - this should never happen but let's catch it with detailed info if field_config is None: + logger.error( + f"CRITICAL: get_fields_from_field_config received None field_config. " + f"This indicates a serious bug. Type: {type(field_config)}, Value: {field_config}" + ) return [] fields = [] @@ -298,7 +303,16 @@ def get_fields_from_field_config( ) ) - for override in field_config.get("overrides", []): + # Additional safety check before iterating + overrides = field_config.get("overrides", []) + if overrides is None: + logger.error( + f"CRITICAL: field_config.get('overrides', []) returned None. " + f"field_config type: {type(field_config)}, field_config: {field_config}" + ) + overrides = [] + + for override in overrides: if override.get("matcher", {}).get("id") == "byName": field_name = override.get("matcher", {}).get("options") if field_name: diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py index 5d9774904c9715..faa45fc4bd6a1e 100644 --- a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py +++ b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py @@ -125,11 +125,35 @@ def test_get_fields_from_field_config_empty(): def test_get_fields_from_field_config_none(): - """Test that get_fields_from_field_config handles None input gracefully.""" - fields = get_fields_from_field_config(None) + """Test that get_fields_from_field_config handles empty dict input (via safe property).""" + # The safe_field_config property ensures we never pass None to this function + fields = get_fields_from_field_config({}) assert fields == [] +def test_panel_safe_field_config_property(): + """Test that Panel.safe_field_config always returns a dict, never None.""" + from datahub.ingestion.source.grafana.models import Panel + + # Test with explicit None field_config (should be converted by model validator) + panel_data = { + "id": "test1", + "type": "text", + "fieldConfig": None, # This should be converted to {} by the model validator + } + + panel = Panel.model_validate(panel_data) + + # The safe_field_config property should always return a dict + safe_config = panel.safe_field_config + assert isinstance(safe_config, dict) + assert safe_config == {} # Should be empty dict, not None + + # The actual field_config should also be a dict after validation + assert isinstance(panel.field_config, dict) + assert panel.field_config == {} + + def test_extract_fields_from_panel_with_empty_fields(): """Test that extract_fields_from_panel handles panels with empty fields efficiently.""" from datahub.ingestion.source.grafana.field_utils import extract_fields_from_panel From 072c7d3d3f0fd16e5b2c2de5d93879ba681e8d81 Mon Sep 17 00:00:00 2001 From: Jonny Dixon Date: Tue, 18 Nov 2025 19:58:50 +0000 Subject: [PATCH 10/15] Update field_utils.py --- .../ingestion/source/grafana/field_utils.py | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py index e4dcf5680aff0c..3cb35f4344af5a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py @@ -283,14 +283,6 @@ def get_fields_from_field_config( field_config: Dict[str, Any], ) -> List[SchemaFieldClass]: """Extract fields from field configuration.""" - # Ultimate safety check - this should never happen but let's catch it with detailed info - if field_config is None: - logger.error( - f"CRITICAL: get_fields_from_field_config received None field_config. " - f"This indicates a serious bug. Type: {type(field_config)}, Value: {field_config}" - ) - return [] - fields = [] defaults = field_config.get("defaults", {}) unit = defaults.get("unit") @@ -303,16 +295,7 @@ def get_fields_from_field_config( ) ) - # Additional safety check before iterating - overrides = field_config.get("overrides", []) - if overrides is None: - logger.error( - f"CRITICAL: field_config.get('overrides', []) returned None. " - f"field_config type: {type(field_config)}, field_config: {field_config}" - ) - overrides = [] - - for override in overrides: + for override in field_config.get("overrides", []): if override.get("matcher", {}).get("id") == "byName": field_name = override.get("matcher", {}).get("options") if field_name: From 32b3e9dbe0447a8f7fb31c21258407d291ff527e Mon Sep 17 00:00:00 2001 From: Jonny Dixon Date: Tue, 18 Nov 2025 20:08:39 +0000 Subject: [PATCH 11/15] Update field_utils.py --- .../src/datahub/ingestion/source/grafana/field_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py index 3cb35f4344af5a..15c6097b6f3cdd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py @@ -295,7 +295,8 @@ def get_fields_from_field_config( ) ) - for override in field_config.get("overrides", []): + overrides = field_config.get("overrides") or [] + for override in overrides: if override.get("matcher", {}).get("id") == "byName": field_name = override.get("matcher", {}).get("options") if field_name: From cfaa0e7d1a31326f00ec607c2a860aafd5db05d0 Mon Sep 17 00:00:00 2001 From: Jonny Dixon Date: Wed, 19 Nov 2025 16:54:37 +0000 Subject: [PATCH 12/15] rawSQL fixes --- .../ingestion/source/grafana/field_utils.py | 18 ++++++++++++--- .../ingestion/source/grafana/lineage.py | 6 +++-- .../unit/grafana/test_grafana_field_utils.py | 23 +++++++++++++++++++ 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py index 15c6097b6f3cdd..5ad5422fc59516 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/field_utils.py @@ -64,7 +64,13 @@ def extract_raw_sql_fields( report: Optional[Any] = None, ) -> List[SchemaFieldClass]: """Extract fields from raw SQL queries using DataHub's SQL parsing.""" - raw_sql = target.get("rawSql", "") + # Handle case variations: rawSql, rawSQL, etc. + raw_sql = "" + for key, value in target.items(): + if key.lower() == "rawsql" and value: + raw_sql = value + break + if not raw_sql: return [] @@ -143,7 +149,13 @@ def extract_raw_sql_fields( def _extract_raw_sql_fields_fallback(target: Dict[str, Any]) -> List[SchemaFieldClass]: """Fallback basic SQL parsing for when sqlglot fails.""" - raw_sql = target.get("rawSql", "").lower() + # Handle case variations: rawSql, rawSQL, etc. + raw_sql = "" + for key, value in target.items(): + if key.lower() == "rawsql" and value: + raw_sql = value + break + if not raw_sql: return [] @@ -200,7 +212,7 @@ def _extract_raw_sql_fields_fallback(target: Dict[str, Any]) -> List[SchemaField return fields except (IndexError, ValueError, StopIteration) as e: - logger.warning(f"Failed to parse SQL: {target.get('rawSql')}", e) + logger.warning(f"Failed to parse SQL: {raw_sql}", e) return [] diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/lineage.py index 57bca941690d19..ace1ed4f93e74f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/lineage.py @@ -89,8 +89,10 @@ def _extract_raw_sql( ) -> Optional[str]: """Extract raw SQL from panel query targets.""" for target in query_targets: - if target.get("rawSql"): - return target["rawSql"] + # Handle case variations: rawSql, rawSQL, etc. + for key, value in target.items(): + if key.lower() == "rawsql" and value: + return value return None def _build_dataset_urn(self, ds_type: str, ds_uid: str, panel_id: str) -> str: diff --git a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py index faa45fc4bd6a1e..48fb7dcf4ef953 100644 --- a/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py +++ b/metadata-ingestion/tests/unit/grafana/test_grafana_field_utils.py @@ -67,6 +67,29 @@ def test_extract_raw_sql_fields(): assert fields[1].fieldPath == "request_count" +def test_extract_raw_sql_fields_case_insensitive(): + """Test that extract_raw_sql_fields handles different case variations of rawSql.""" + sql_query = "SELECT name as user_name, count as request_count FROM requests" + + # Test different case variations + target_rawSql = {"rawSql": sql_query} + target_rawSQL = {"rawSQL": sql_query} + target_mixed = {"RawSQL": sql_query} + + fields1 = extract_raw_sql_fields(target_rawSql) + fields2 = extract_raw_sql_fields(target_rawSQL) + fields3 = extract_raw_sql_fields(target_mixed) + + # All should extract the same fields + assert len(fields1) == 2 + assert len(fields2) == 2 + assert len(fields3) == 2 + + # Field names should be the same + assert fields1[0].fieldPath == fields2[0].fieldPath == fields3[0].fieldPath + assert fields1[1].fieldPath == fields2[1].fieldPath == fields3[1].fieldPath + + def test_extract_raw_sql_fields_invalid(caplog): # Test with completely invalid SQL target = {"rawSql": "INVALID SQL"} From d6642268023b2033511e105ec6ff7a5b3bd5d3a9 Mon Sep 17 00:00:00 2001 From: Jonny Dixon Date: Thu, 20 Nov 2025 19:04:00 +0000 Subject: [PATCH 13/15] Update grafana_config.py --- .../src/datahub/ingestion/source/grafana/grafana_config.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_config.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_config.py index b897e25daf0a8a..ab576985f2cc86 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_config.py @@ -8,6 +8,9 @@ EnvConfigMixin, PlatformInstanceConfigMixin, ) +from datahub.ingestion.source.state.stale_entity_removal_handler import ( + StatefulStaleMetadataRemovalConfig, +) from datahub.ingestion.source.state.stateful_ingestion_base import ( StatefulIngestionConfigBase, ) @@ -104,6 +107,10 @@ class GrafanaSourceConfig( description="Map of Grafana datasource types/UIDs to platform connection configs for lineage extraction", ) + stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field( + default=None, description="Stateful ingestion configuration" + ) + @field_validator("url", mode="after") @classmethod def remove_trailing_slash(cls, v: str) -> str: From d8be1667dbc6dd05add87040343402fe51196ae7 Mon Sep 17 00:00:00 2001 From: Jonny Dixon Date: Thu, 20 Nov 2025 21:01:57 +0000 Subject: [PATCH 14/15] Update lineage.py --- .../ingestion/source/grafana/lineage.py | 41 ++++++++++--------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/lineage.py index ace1ed4f93e74f..f66e74804a2e6d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/lineage.py @@ -166,28 +166,31 @@ def _create_column_lineage( dataset_urn: str, parsed_sql: SqlParsingResult, ) -> Optional[MetadataChangeProposalWrapper]: - """Create column-level lineage""" - if not parsed_sql.column_lineage or not self.include_column_lineage: + """Create column-level lineage and dataset-level lineage from parsed SQL""" + # Always create dataset-level lineage if we have upstream tables + if not parsed_sql.in_tables: return None upstream_lineages = [] - for col_lineage in parsed_sql.column_lineage: - upstream_lineages.append( - FineGrainedLineageClass( - downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD, - downstreams=[ - make_schema_field_urn( - dataset_urn, col_lineage.downstream.column - ) - ], - upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET, - upstreams=[ - make_schema_field_urn(upstream_dataset, col.column) - for col in col_lineage.upstreams - for upstream_dataset in parsed_sql.in_tables - ], + # Add column-level lineage if available and enabled + if parsed_sql.column_lineage and self.include_column_lineage: + for col_lineage in parsed_sql.column_lineage: + upstream_lineages.append( + FineGrainedLineageClass( + downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD, + downstreams=[ + make_schema_field_urn( + dataset_urn, col_lineage.downstream.column + ) + ], + upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET, + upstreams=[ + make_schema_field_urn(upstream_dataset, col.column) + for col in col_lineage.upstreams + for upstream_dataset in parsed_sql.in_tables + ], + ) ) - ) return MetadataChangeProposalWrapper( entityUrn=dataset_urn, @@ -199,6 +202,6 @@ def _create_column_lineage( ) for table in parsed_sql.in_tables ], - fineGrainedLineages=upstream_lineages, + fineGrainedLineages=upstream_lineages if upstream_lineages else None, ), ) From 475e8a51b6e4908d3badfc006eeb5e162273bc84 Mon Sep 17 00:00:00 2001 From: Jonny Dixon Date: Fri, 21 Nov 2025 18:01:41 +0000 Subject: [PATCH 15/15] fixing snapshot lineage overwrite --- .../source/grafana/grafana_source.py | 90 +- .../grafana/grafana_basic_mcps_golden.json | 760 ++++++------ .../grafana/grafana_mcps_golden.json | 1014 ++++++++++------- 3 files changed, 1078 insertions(+), 786 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_source.py b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_source.py index 99cb796de7233e..23a779aede623c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/grafana/grafana_source.py @@ -61,11 +61,9 @@ DashboardInfoClass, DataPlatformInstanceClass, DatasetPropertiesClass, - DatasetSnapshotClass, GlobalTagsClass, InputFieldClass, InputFieldsClass, - MetadataChangeEventClass, OtherSchemaClass, SchemaFieldClass, SchemaMetadataClass, @@ -497,47 +495,58 @@ def _process_panel_dataset( env=self.env, ) - # Create dataset snapshot - dataset_snapshot = DatasetSnapshotClass( - urn=dataset_urn, - aspects=[ - DataPlatformInstanceClass( - platform=make_data_platform_urn(self.platform), - instance=make_dataplatform_instance_urn( - platform=self.platform, - instance=self.platform_instance, - ) - if self.platform_instance - else None, - ), - DatasetPropertiesClass( - name=f"{ds_uid} ({panel.title or panel.id})", - description="", - customProperties={ - "type": ds_type, - "uid": ds_uid, - "full_path": dataset_name, - }, - ), - StatusClass(removed=False), - ], - ) + # Platform instance aspect + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=DataPlatformInstanceClass( + platform=make_data_platform_urn(self.platform), + instance=make_dataplatform_instance_urn( + platform=self.platform, + instance=self.platform_instance, + ) + if self.platform_instance + else None, + ), + ).as_workunit() + + # Dataset properties aspect + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=DatasetPropertiesClass( + name=f"{ds_uid} ({panel.title or panel.id})", + description="", + customProperties={ + "type": ds_type, + "uid": ds_uid, + "full_path": dataset_name, + }, + ), + ).as_workunit() + + # Status aspect + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=StatusClass(removed=False), + ).as_workunit() # Add schema metadata if available schema_fields = extract_fields_from_panel( panel, self.config.connection_to_platform_map, self.ctx.graph, self.report ) if schema_fields: - schema_metadata = SchemaMetadataClass( - schemaName=f"{ds_type}.{ds_uid}.{panel.id}", - platform=make_data_platform_urn(self.platform), - version=0, - fields=schema_fields, - hash="", - platformSchema=OtherSchemaClass(rawSchema=""), - ) - dataset_snapshot.aspects.append(schema_metadata) + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=SchemaMetadataClass( + schemaName=f"{ds_type}.{ds_uid}.{panel.id}", + platform=make_data_platform_urn(self.platform), + version=0, + fields=schema_fields, + hash="", + platformSchema=OtherSchemaClass(rawSchema=""), + ), + ).as_workunit() + # Add tags if available if dashboard_uid and self.config.ingest_tags: dashboard = self.api_client.get_dashboard(dashboard_uid) if dashboard and dashboard.tags: @@ -546,13 +555,12 @@ def _process_panel_dataset( tags.append(TagAssociationClass(tag=make_tag_urn(tag))) if tags: - dataset_snapshot.aspects.append(GlobalTagsClass(tags=tags)) + yield MetadataChangeProposalWrapper( + entityUrn=dataset_urn, + aspect=GlobalTagsClass(tags=tags), + ).as_workunit() self.report.report_dataset_scanned() - yield MetadataWorkUnit( - id=f"grafana-dataset-{ds_uid}-{panel.id}", - mce=MetadataChangeEventClass(proposedSnapshot=dataset_snapshot), - ) # Add dataset to dashboard container if dashboard_uid: diff --git a/metadata-ingestion/tests/integration/grafana/grafana_basic_mcps_golden.json b/metadata-ingestion/tests/integration/grafana/grafana_basic_mcps_golden.json index afe2386d391a14..572bdfb1f721a3 100644 --- a/metadata-ingestion/tests/integration/grafana/grafana_basic_mcps_golden.json +++ b/metadata-ingestion/tests/integration/grafana/grafana_basic_mcps_golden.json @@ -188,102 +188,132 @@ } }, { - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.2,PROD)", - "aspects": [ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.2,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:grafana" + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.2,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "type": "postgres", + "uid": "test-postgres", + "full_path": "postgres.test-postgres.2" + }, + "name": "test-postgres (Response Times by Dimension)", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.2,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.2,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "postgres.test-postgres.2", + "platform": "urn:li:dataPlatform:grafana", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ { - "com.linkedin.pegasus2avro.common.DataPlatformInstance": { - "platform": "urn:li:dataPlatform:grafana" - } + "fieldPath": "value", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "value", + "recursive": false, + "isPartOfKey": false }, { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "type": "postgres", - "uid": "test-postgres", - "full_path": "postgres.test-postgres.2" - }, - "name": "test-postgres (Response Times by Dimension)", - "description": "", - "tags": [] - } + "fieldPath": "time", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "sql_column", + "recursive": false, + "isPartOfKey": false }, { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } + "fieldPath": "dimension", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "sql_column", + "recursive": false, + "isPartOfKey": false }, { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "postgres.test-postgres.2", - "platform": "urn:li:dataPlatform:grafana", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "value", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "value", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "time", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "sql_column", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "dimension", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "sql_column", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "value_ms", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "value", - "recursive": false, - "isPartOfKey": false - } - ] - } + "fieldPath": "value_ms", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "value", + "recursive": false, + "isPartOfKey": false } ] } @@ -375,7 +405,7 @@ "datasourceType": "postgres", "datasourceUid": "test-postgres", "description": "Response times tracked across different dimensions", - "queryCount": "1" + "targetsCount": "1" }, "title": "Response Times by Dimension", "description": "Response times tracked across different dimensions", @@ -549,102 +579,132 @@ } }, { - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.4,PROD)", - "aspects": [ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.4,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:grafana" + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.4,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "type": "postgres", + "uid": "test-postgres", + "full_path": "postgres.test-postgres.4" + }, + "name": "test-postgres (Recent Metrics)", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.4,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.4,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "postgres.test-postgres.4", + "platform": "urn:li:dataPlatform:grafana", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ { - "com.linkedin.pegasus2avro.common.DataPlatformInstance": { - "platform": "urn:li:dataPlatform:grafana" - } + "fieldPath": "metric", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false }, { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "type": "postgres", - "uid": "test-postgres", - "full_path": "postgres.test-postgres.4" - }, - "name": "test-postgres (Recent Metrics)", - "description": "", - "tags": [] - } + "fieldPath": "value", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "isPartOfKey": false }, { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } + "fieldPath": "dimension", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false }, { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "postgres.test-postgres.4", - "platform": "urn:li:dataPlatform:grafana", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "metric", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "value", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "number", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "dimension", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "time", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "sql_column", - "recursive": false, - "isPartOfKey": false - } - ] - } + "fieldPath": "time", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "sql_column", + "recursive": false, + "isPartOfKey": false } ] } @@ -736,7 +796,7 @@ "datasourceType": "postgres", "datasourceUid": "test-postgres", "description": "Recent metrics from all sources", - "queryCount": "1" + "targetsCount": "1" }, "title": "Recent Metrics", "description": "Recent metrics from all sources", @@ -955,78 +1015,108 @@ } }, { - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.5,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.DataPlatformInstance": { - "platform": "urn:li:dataPlatform:grafana" - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "type": "postgres", - "uid": "test-postgres", - "full_path": "postgres.test-postgres.5" - }, - "name": "test-postgres (Total Metrics Count)", - "description": "", - "tags": [] - } - }, + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.5,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:grafana" + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.5,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "type": "postgres", + "uid": "test-postgres", + "full_path": "postgres.test-postgres.5" + }, + "name": "test-postgres (Total Metrics Count)", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.5,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,postgres.test-postgres.5,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "postgres.test-postgres.5", + "platform": "urn:li:dataPlatform:grafana", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } + "fieldPath": "count", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "isPartOfKey": false }, { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "postgres.test-postgres.5", - "platform": "urn:li:dataPlatform:grafana", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "count", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "number", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "time", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.TimeType": {} - } - }, - "nativeDataType": "timestamp", - "recursive": false, - "isPartOfKey": false - } - ] - } + "fieldPath": "time", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp", + "recursive": false, + "isPartOfKey": false } ] } @@ -1118,7 +1208,7 @@ "datasourceType": "postgres", "datasourceUid": "test-postgres", "description": "Total number of metrics collected", - "queryCount": "1" + "targetsCount": "1" }, "title": "Total Metrics Count", "description": "Total number of metrics collected", @@ -1247,78 +1337,108 @@ } }, { - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:grafana,prometheus.test-prometheus.6,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.DataPlatformInstance": { - "platform": "urn:li:dataPlatform:grafana" - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "type": "prometheus", - "uid": "test-prometheus", - "full_path": "prometheus.test-prometheus.6" - }, - "name": "test-prometheus (System Metrics)", - "description": "", - "tags": [] - } - }, + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,prometheus.test-prometheus.6,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:grafana" + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,prometheus.test-prometheus.6,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "type": "prometheus", + "uid": "test-prometheus", + "full_path": "prometheus.test-prometheus.6" + }, + "name": "test-prometheus (System Metrics)", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,prometheus.test-prometheus.6,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,prometheus.test-prometheus.6,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "prometheus.test-prometheus.6", + "platform": "urn:li:dataPlatform:grafana", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } + "fieldPath": "CPU Usage", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "prometheus_metric", + "recursive": false, + "isPartOfKey": false }, { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "prometheus.test-prometheus.6", - "platform": "urn:li:dataPlatform:grafana", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "CPU Usage", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "prometheus_metric", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "Memory Usage", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "prometheus_metric", - "recursive": false, - "isPartOfKey": false - } - ] - } + "fieldPath": "Memory Usage", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "prometheus_metric", + "recursive": false, + "isPartOfKey": false } ] } @@ -1410,7 +1530,7 @@ "datasourceType": "prometheus", "datasourceUid": "test-prometheus", "description": "Prometheus system metrics", - "queryCount": "2" + "targetsCount": "2" }, "title": "System Metrics", "description": "Prometheus system metrics", diff --git a/metadata-ingestion/tests/integration/grafana/grafana_mcps_golden.json b/metadata-ingestion/tests/integration/grafana/grafana_mcps_golden.json index ce1d98601c8043..7145f4d325da95 100644 --- a/metadata-ingestion/tests/integration/grafana/grafana_mcps_golden.json +++ b/metadata-ingestion/tests/integration/grafana/grafana_mcps_golden.json @@ -223,115 +223,133 @@ } }, { - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.2,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.DataPlatformInstance": { - "platform": "urn:li:dataPlatform:grafana", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:grafana,local-grafana)" - } - }, + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.2,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:grafana", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:grafana,local-grafana)" + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.2,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "type": "postgres", + "uid": "test-postgres", + "full_path": "postgres.test-postgres.2" + }, + "name": "test-postgres (Response Times by Dimension)", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.2,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.2,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "postgres.test-postgres.2", + "platform": "urn:li:dataPlatform:grafana", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "type": "postgres", - "uid": "test-postgres", - "full_path": "postgres.test-postgres.2" - }, - "name": "test-postgres (Response Times by Dimension)", - "description": "", - "tags": [] - } + "fieldPath": "value", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "value", + "recursive": false, + "isPartOfKey": false }, { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } + "fieldPath": "time", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NullType": {} + } + }, + "nativeDataType": "", + "recursive": false, + "isPartOfKey": false }, { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "postgres.test-postgres.2", - "platform": "urn:li:dataPlatform:grafana", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "value", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "value", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "time", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} - } - }, - "nativeDataType": "", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "dimension", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} - } - }, - "nativeDataType": "", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "value_ms", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "value", - "recursive": false, - "isPartOfKey": false - } - ] - } + "fieldPath": "dimension", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NullType": {} + } + }, + "nativeDataType": "", + "recursive": false, + "isPartOfKey": false }, { - "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [ - { - "tag": "urn:li:tag:test-tag" - }, - { - "tag": "urn:li:tag:integration-test" - } - ] - } + "fieldPath": "value_ms", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "value", + "recursive": false, + "isPartOfKey": false } ] } @@ -346,10 +364,17 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.2,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "globalTags", "aspect": { "json": { - "container": "urn:li:container:ae0ac23df7f392b003891eb008eea810" + "tags": [ + { + "tag": "urn:li:tag:test-tag" + }, + { + "tag": "urn:li:tag:integration-test" + } + ] } }, "systemMetadata": { @@ -362,19 +387,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.2,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "container", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local.grafana.test-postgres,PROD)", - "type": "TRANSFORMED" - } - ] + "container": "urn:li:container:ae0ac23df7f392b003891eb008eea810" } }, "systemMetadata": { @@ -453,7 +469,7 @@ "datasourceType": "postgres", "datasourceUid": "test-postgres", "description": "Response times tracked across different dimensions", - "queryCount": "1" + "targetsCount": "1" }, "title": "Response Times by Dimension", "description": "Response times tracked across different dimensions", @@ -654,115 +670,133 @@ } }, { - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.4,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.DataPlatformInstance": { - "platform": "urn:li:dataPlatform:grafana", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:grafana,local-grafana)" - } - }, + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.4,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:grafana", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:grafana,local-grafana)" + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.4,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "type": "postgres", + "uid": "test-postgres", + "full_path": "postgres.test-postgres.4" + }, + "name": "test-postgres (Recent Metrics)", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.4,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.4,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "postgres.test-postgres.4", + "platform": "urn:li:dataPlatform:grafana", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "type": "postgres", - "uid": "test-postgres", - "full_path": "postgres.test-postgres.4" - }, - "name": "test-postgres (Recent Metrics)", - "description": "", - "tags": [] - } + "fieldPath": "metric", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false }, { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } + "fieldPath": "value", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "isPartOfKey": false }, { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "postgres.test-postgres.4", - "platform": "urn:li:dataPlatform:grafana", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "metric", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "value", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "number", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "dimension", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "time", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} - } - }, - "nativeDataType": "", - "recursive": false, - "isPartOfKey": false - } - ] - } + "fieldPath": "dimension", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false }, { - "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [ - { - "tag": "urn:li:tag:test-tag" - }, - { - "tag": "urn:li:tag:integration-test" - } - ] - } + "fieldPath": "time", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NullType": {} + } + }, + "nativeDataType": "", + "recursive": false, + "isPartOfKey": false } ] } @@ -777,10 +811,17 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.4,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "globalTags", "aspect": { "json": { - "container": "urn:li:container:ae0ac23df7f392b003891eb008eea810" + "tags": [ + { + "tag": "urn:li:tag:test-tag" + }, + { + "tag": "urn:li:tag:integration-test" + } + ] } }, "systemMetadata": { @@ -793,19 +834,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.4,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "container", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local.grafana.test-postgres,PROD)", - "type": "TRANSFORMED" - } - ] + "container": "urn:li:container:ae0ac23df7f392b003891eb008eea810" } }, "systemMetadata": { @@ -884,7 +916,7 @@ "datasourceType": "postgres", "datasourceUid": "test-postgres", "description": "Recent metrics from all sources", - "queryCount": "1" + "targetsCount": "1" }, "title": "Recent Metrics", "description": "Recent metrics from all sources", @@ -1130,93 +1162,14 @@ } }, { - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.DataPlatformInstance": { - "platform": "urn:li:dataPlatform:grafana", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:grafana,local-grafana)" - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "type": "postgres", - "uid": "test-postgres", - "full_path": "postgres.test-postgres.5" - }, - "name": "test-postgres (Total Metrics Count)", - "description": "", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "postgres.test-postgres.5", - "platform": "urn:li:dataPlatform:grafana", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "count", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "number", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "time", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.TimeType": {} - } - }, - "nativeDataType": "timestamp", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - { - "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [ - { - "tag": "urn:li:tag:test-tag" - }, - { - "tag": "urn:li:tag:integration-test" - } - ] - } - } - ] + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:grafana", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:grafana,local-grafana)" } }, "systemMetadata": { @@ -1229,10 +1182,17 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "datasetProperties", "aspect": { "json": { - "container": "urn:li:container:ae0ac23df7f392b003891eb008eea810" + "customProperties": { + "type": "postgres", + "uid": "test-postgres", + "full_path": "postgres.test-postgres.5" + }, + "name": "test-postgres (Total Metrics Count)", + "description": "", + "tags": [] } }, "systemMetadata": { @@ -1245,17 +1205,66 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "status", "aspect": { "json": { - "upstreams": [ + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "postgres.test-postgres.5", + "platform": "urn:li:dataPlatform:grafana", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" + "fieldPath": "count", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local.grafana.test-postgres,PROD)", - "type": "TRANSFORMED" + "nativeDataType": "number", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "time", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.TimeType": {} + } + }, + "nativeDataType": "timestamp", + "recursive": false, + "isPartOfKey": false } ] } @@ -1266,6 +1275,45 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [ + { + "tag": "urn:li:tag:test-tag" + }, + { + "tag": "urn:li:tag:integration-test" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ae0ac23df7f392b003891eb008eea810" + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)", @@ -1336,7 +1384,7 @@ "datasourceType": "postgres", "datasourceUid": "test-postgres", "description": "Total number of metrics collected", - "queryCount": "1" + "targetsCount": "1" }, "title": "Total Metrics Count", "description": "Total number of metrics collected", @@ -1492,91 +1540,109 @@ } }, { - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.prometheus.test-prometheus.6,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.DataPlatformInstance": { - "platform": "urn:li:dataPlatform:grafana", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:grafana,local-grafana)" - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "type": "prometheus", - "uid": "test-prometheus", - "full_path": "prometheus.test-prometheus.6" - }, - "name": "test-prometheus (System Metrics)", - "description": "", - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.prometheus.test-prometheus.6,PROD)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:grafana", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:grafana,local-grafana)" + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.prometheus.test-prometheus.6,PROD)", + "changeType": "UPSERT", + "aspectName": "datasetProperties", + "aspect": { + "json": { + "customProperties": { + "type": "prometheus", + "uid": "test-prometheus", + "full_path": "prometheus.test-prometheus.6" + }, + "name": "test-prometheus (System Metrics)", + "description": "", + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.prometheus.test-prometheus.6,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.prometheus.test-prometheus.6,PROD)", + "changeType": "UPSERT", + "aspectName": "schemaMetadata", + "aspect": { + "json": { + "schemaName": "prometheus.test-prometheus.6", + "platform": "urn:li:dataPlatform:grafana", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "prometheus.test-prometheus.6", - "platform": "urn:li:dataPlatform:grafana", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "CPU Usage", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "prometheus_metric", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "Memory Usage", - "nullable": false, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "prometheus_metric", - "recursive": false, - "isPartOfKey": false - } - ] - } + "fieldPath": "CPU Usage", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "prometheus_metric", + "recursive": false, + "isPartOfKey": false }, { - "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [ - { - "tag": "urn:li:tag:test-tag" - }, - { - "tag": "urn:li:tag:integration-test" - } - ] - } + "fieldPath": "Memory Usage", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.NumberType": {} + } + }, + "nativeDataType": "prometheus_metric", + "recursive": false, + "isPartOfKey": false } ] } @@ -1591,10 +1657,17 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.prometheus.test-prometheus.6,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "globalTags", "aspect": { "json": { - "container": "urn:li:container:ae0ac23df7f392b003891eb008eea810" + "tags": [ + { + "tag": "urn:li:tag:test-tag" + }, + { + "tag": "urn:li:tag:integration-test" + } + ] } }, "systemMetadata": { @@ -1607,19 +1680,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.prometheus.test-prometheus.6,PROD)", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "container", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:prometheus,local.test-prometheus,PROD)", - "type": "TRANSFORMED" - } - ] + "container": "urn:li:container:ae0ac23df7f392b003891eb008eea810" } }, "systemMetadata": { @@ -1698,7 +1762,7 @@ "datasourceType": "prometheus", "datasourceUid": "test-prometheus", "description": "Prometheus system metrics", - "queryCount": "2" + "targetsCount": "2" }, "title": "System Metrics", "description": "Prometheus system metrics", @@ -1838,6 +1902,106 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.2,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local.grafana.test-postgres,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.4,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local.grafana.test-postgres,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.postgres.test-postgres.5,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,local.grafana.test-postgres,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:grafana,local-grafana.prometheus.test-prometheus.6,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:prometheus,local.test-prometheus,PROD)", + "type": "TRANSFORMED" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1720785600000, + "runId": "grafana-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(grafana,local-grafana.default)",