feat: publish new GenAI Eval Service SDK for Google Observability public tutorial (#2383)

matty3 · web-flow · commit e8baf6577253 · 2025-10-06T11:06:51.000-05:00
diff --git a/gemini/evaluation/evaluating_observability_datasets.ipynb b/gemini/evaluation/evaluating_observability_datasets.ipynb
@@ -0,0 +1,285 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ur8xi4C7S06n"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2025 Google LLC\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JAPoU8Sm5E6e"
+      },
+      "source": [
+        "# Using Gen AI Evaluation SDK for Google Observability Gen AI multi-modal datasets\n",
+        "\n",
+        "<table align=\"left\">\n",
+        "  <td style=\"text-align: center\">\n",
+        "    <a href=\"https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_observability_datasets.ipynb\">\n",
+        "      <img width=\"32px\" src=\"https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg\" alt=\"Google Colaboratory logo\"><br> Open in Colab\n",
+        "    </a>\n",
+        "  </td>\n",
+        "  <td style=\"text-align: center\">\n",
+        "    <a href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fevaluation%2Fevaluating_observability_datasets.ipynb\">\n",
+        "      <img width=\"32px\" src=\"https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN\" alt=\"Google Cloud Colab Enterprise logo\"><br> Open in Colab Enterprise\n",
+        "    </a>\n",
+        "  </td>\n",
+        "  <td style=\"text-align: center\">\n",
+        "    <a href=\"https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/evaluation/evaluating_observability_datasets.ipynb\">\n",
+        "      <img src=\"https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg\" alt=\"Vertex AI logo\"><br> Open in Vertex AI Workbench\n",
+        "    </a>\n",
+        "  </td>\n",
+        "  <td style=\"text-align: center\">\n",
+        "    <a href=\"https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_observability_datasets.ipynb\">\n",
+        "      <img width=\"32px\" src=\"https://storage.googleapis.com/github-repo/generative-ai/logos/GitHub_Invertocat_Dark.svg\" alt=\"GitHub logo\"><br> View on GitHub\n",
+        "    </a>\n",
+        "  </td>\n",
+        "</table>\n",
+        "\n",
+        "<div style=\"clear: both;\"></div>\n",
+        "\n",
+        "<p>\n",
+        "<b>Share to:</b>\n",
+        "\n",
+        "<a href=\"https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_observability_datasets.ipynb\" target=\"_blank\">\n",
+        "  <img width=\"20px\" src=\"https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg\" alt=\"LinkedIn logo\">\n",
+        "</a>\n",
+        "\n",
+        "<a href=\"https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_observability_datasets.ipynb\" target=\"_blank\">\n",
+        "  <img width=\"20px\" src=\"https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg\" alt=\"Bluesky logo\">\n",
+        "</a>\n",
+        "\n",
+        "<a href=\"https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_observability_datasets.ipynb\" target=\"_blank\">\n",
+        "  <img width=\"20px\" src=\"https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg\" alt=\"X logo\">\n",
+        "</a>\n",
+        "\n",
+        "<a href=\"https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_observability_datasets.ipynb\" target=\"_blank\">\n",
+        "  <img width=\"20px\" src=\"https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png\" alt=\"Reddit logo\">\n",
+        "</a>\n",
+        "\n",
+        "<a href=\"https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/evaluation/evaluating_observability_datasets.ipynb\" target=\"_blank\">\n",
+        "  <img width=\"20px\" src=\"https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg\" alt=\"Facebook logo\">\n",
+        "</a>\n",
+        "</p>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "84f0f73a0f76"
+      },
+      "source": [
+        "| Author |\n",
+        "| --- |\n",
+        "| [Matthew Yun](https://github.com/matty3) |"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tvgnzT1CKxrO"
+      },
+      "source": [
+        "## Overview\n",
+        "\n",
+        "This notebook demonstrates how users can use the [Vertex AI SDK Gen AI evaluation service](https://github.com/googleapis/python-aiplatform) for evaluating your Gen AI multimodal content stored in [Google Observability](https://cloud.google.com/stackdriver/docs).\n",
+        "\n",
+        "The Vertex AI SDK allows users to run evaluations on their Gen AI models prompts and responses. This now includes the ability for users to run evaluations against the Gen AI data that has been stored in Google Cloud Storage (GCS) within Observability following [OpenTelemetry semantic conventions](https://opentelemetry.io/docs/specs/semconv/gen-ai/).\n",
+        "\n",
+        "Prompt, response, and system instruction data in Google Observability are stored in separate GCS references. This notebook gives an example for how users can read in their data from GCS and run an evaluation using the Vertex AI SDK."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "61RBz8LLbxCR"
+      },
+      "source": [
+        "## Get started"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "No17Cw5hgx12"
+      },
+      "source": [
+        "### Install Vertex AI SDK for Gen AI Evaluation Service\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tFy3H3aPgx12"
+      },
+      "outputs": [],
+      "source": [
+        "%pip install --upgrade \"google-cloud-aiplatform[evaluation]>=1.111.0\" --force-reinstall --quiet --no-warn-conflicts"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dmWOrTJ3gx13"
+      },
+      "source": [
+        "### Authenticate your notebook environment (Colab only)\n",
+        "\n",
+        "If you're running this notebook on Google Colab, run the cell below to authenticate your environment."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NyKGtVQjgx13"
+      },
+      "outputs": [],
+      "source": [
+        "import sys\n",
+        "\n",
+        "if \"google.colab\" in sys.modules:\n",
+        "    from google.colab import auth\n",
+        "\n",
+        "    auth.authenticate_user()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DF4l8DTdWgPY"
+      },
+      "source": [
+        "### Set Google Cloud project information\n",
+        "\n",
+        "To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n",
+        "\n",
+        "Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Nqwi-5ufWp_B"
+      },
+      "outputs": [],
+      "source": [
+        "# Use the environment variable if the user doesn't provide Project ID.\n",
+        "import os\n",
+        "\n",
+        "# fmt: off\n",
+        "PROJECT_ID = \"\"  # @param {type: \"string\", placeholder: \"[your-project-id]\", isTemplate: true}\n",
+        "if not PROJECT_ID or PROJECT_ID == \"[your-project-id]\":\n",
+        "    PROJECT_ID = str(os.environ.get(\"GOOGLE_CLOUD_PROJECT\"))\n",
+        "LOCATION = \"us-central1\"  # @param {type: \"string\", placeholder: \"us-central1\", isTemplate: true}\n",
+        "# fmt: on\n",
+        "LOCATION = os.environ.get(\"GOOGLE_CLOUD_REGION\", LOCATION)\n",
+        "\n",
+        "from vertexai import Client, types\n",
+        "\n",
+        "client = Client(project=PROJECT_ID, location=LOCATION)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EdvJRUWRNGHE"
+      },
+      "source": [
+        "## Evaluation Dataset"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "e43229f3ad4f"
+      },
+      "source": [
+        "### Load in Google Observability Gen AI dataset\n",
+        "\n",
+        "We will need to read in the data stored in Google Cloud Storage and prepare for evaluation."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cf93d5f0ce00"
+      },
+      "outputs": [],
+      "source": [
+        "# fmt: off\n",
+        "INPUT_SOURCE = \"\"  # @param {type: \"string\", placeholder: \"[your-input-source]\", isTemplate: true}\n",
+        "OUTPUT_SOURCE = \"\"  # @param {type: \"string\", placeholder: \"[your-output-source]\", isTemplate: true}\n",
+        "SYSTEM_INSTRUCTION_SOURCE = \"\"  # @param {type: \"string\", placeholder: \"[your-system-instruction-source]\", isTemplate: true}\n",
+        "# fmt: on\n",
+        "\n",
+        "eval_case = types.ObservabilityEvalCase(\n",
+        "    input_src=INPUT_SOURCE,\n",
+        "    output_src=OUTPUT_SOURCE,\n",
+        "    system_instruction_src=SYSTEM_INSTRUCTION_SOURCE,\n",
+        "    api_client=client,\n",
+        ")\n",
+        "eval_dataset = types.EvaluationDataset.load_from_observability_eval_cases([eval_case])\n",
+        "\n",
+        "eval_dataset.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2a4e033321ad"
+      },
+      "source": [
+        "### Run Evaluation\n",
+        "\n",
+        "Evaluate the responses using the `GENERAL_QUALITY` adaptive rubric-based metric by default."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3SWzCOUt21t9"
+      },
+      "outputs": [],
+      "source": [
+        "eval_result = client.evals.evaluate(dataset=eval_dataset)\n",
+        "\n",
+        "eval_result.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "name": "evaluating_observability_datasets.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}