Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 29 additions & 3 deletions tests/e2e/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

from scripts.upload_artifact_s3 import upload_artifact_s3
from tests.e2e.utils import client as client_utils
from tests.e2e.utils import ols_installer
from tests.e2e.utils import cluster, ols_installer
from tests.e2e.utils.adapt_ols_config import adapt_ols_config
from tests.e2e.utils.wait_for_ols import wait_for_ols
from tests.scripts.must_gather import must_gather

Expand All @@ -45,11 +46,36 @@ def pytest_sessionstart():
# OLS_URL env only needs to be set when running against a local ols instance,
# when ols is run against a cluster the url is retrieved from the cluster.
ols_url = os.getenv("OLS_URL", "")

if "localhost" not in ols_url:
on_cluster = True
try:
ols_url, token, metrics_token = ols_installer.install_ols()
result = cluster.run_oc(
[
"get",
"clusterserviceversion",
"-n",
"openshift-lightspeed",
"-o",
"json",
]
)
csv_data = json.loads(result.stdout)
print(csv_data)

if not csv_data["items"]:
print("OLS Operator is not installed yet.")
ols_url, token, metrics_token = ols_installer.install_ols()
else:
print("OLS Operator is already installed. Skipping install.")
provider = os.getenv("PROVIDER", "openai")
creds = os.getenv("PROVIDER_KEY_PATH", "empty")
# create the llm api key secret ols will mount
provider_list = provider.split()
creds_list = creds.split()
for i, prov in enumerate(provider_list):
ols_installer.create_secrets(prov, creds_list[i], len(provider_list))
ols_url, token, metrics_token = adapt_ols_config()

except Exception as e:
print(f"Error setting up OLS on cluster: {e}")
must_gather()
Expand Down
309 changes: 309 additions & 0 deletions tests/e2e/utils/adapt_ols_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,309 @@
"""Functions to adapt OLS configuration for different providers.

Handles multi-provider test scenarios dynamically.
"""

import os

import yaml

from ols.constants import DEFAULT_CONFIGURATION_FILE
from tests.e2e.utils import cluster as cluster_utils
from tests.e2e.utils.constants import OLS_COLLECTOR_DISABLING_FILE
from tests.e2e.utils.retry import retry_until_timeout_or_success
from tests.e2e.utils.wait_for_ols import wait_for_ols


def apply_olsconfig(provider_list: list[str]) -> None:
"""Apply the correct OLSConfig CR based on provider configuration.

Args:
provider_list: List of provider names to configure.
"""
if len(provider_list) == 1:
provider = provider_list[0]
crd_yml_name = f"olsconfig.crd.{provider}"
ols_config_suffix = os.getenv("OLS_CONFIG_SUFFIX", "default")
if ols_config_suffix != "default":
crd_yml_name += f"_{ols_config_suffix}"
print(f"Applying olsconfig CR from {crd_yml_name}.yaml")
cluster_utils.run_oc(
["apply", "-f", f"tests/config/operator_install/{crd_yml_name}.yaml"],
ignore_existing_resource=False,
)
else:
print("Applying evaluation olsconfig CR for multiple providers")
cluster_utils.run_oc(
[
"apply",
"-f",
"tests/config/operator_install/olsconfig.crd.evaluation.yaml",
],
ignore_existing_resource=True,
)
print("OLSConfig CR applied successfully")


def update_ols_configmap() -> None:
"""Update OLS configmap with additional e2e test configurations.

Configures logging levels and user data collector settings for testing.
"""
try:
print("Updating OLS configmap for e2e tests...")
# Get the current configmap
configmap_yaml = cluster_utils.run_oc(
["get", "cm/olsconfig", "-o", "yaml"]
).stdout
configmap = yaml.safe_load(configmap_yaml)
olsconfig = yaml.safe_load(configmap["data"][DEFAULT_CONFIGURATION_FILE])

# Ensure proper logging config for e2e tests
if "ols_config" not in olsconfig:
olsconfig["ols_config"] = {}
if "logging_config" not in olsconfig["ols_config"]:
olsconfig["ols_config"]["logging_config"] = {}

# Set INFO level to avoid redacted logs
olsconfig["ols_config"]["logging_config"]["lib_log_level"] = "INFO"

# Add user data collector config for e2e tests
olsconfig["user_data_collector_config"] = {
"data_storage": "/app-root/ols-user-data",
"log_level": "debug",
"collection_interval": 10,
"run_without_initial_wait": True,
"ingress_env": "stage",
"cp_offline_token": os.getenv("CP_OFFLINE_TOKEN", ""),
}

# Update the configmap
configmap["data"][DEFAULT_CONFIGURATION_FILE] = yaml.dump(olsconfig)
updated_configmap = yaml.dump(configmap)
cluster_utils.run_oc(["apply", "-f", "-"], command=updated_configmap)
print("OLS configmap updated successfully")

except Exception as e:
print(f"Warning: Could not update OLS configmap: {e}")


def setup_service_accounts(namespace: str) -> None:
"""Set up service accounts and access roles.

Args:
namespace: The Kubernetes namespace to create service accounts in.
"""
print("Ensuring 'test-user' service account exists...")
cluster_utils.run_oc(
["create", "sa", "test-user", "-n", namespace],
ignore_existing_resource=True,
)

print("Ensuring 'metrics-test-user' service account exists...")
cluster_utils.run_oc(
["create", "sa", "metrics-test-user", "-n", namespace],
ignore_existing_resource=True,
)

print("Granting access roles to service accounts...")
cluster_utils.grant_sa_user_access("test-user", "lightspeed-operator-query-access")
cluster_utils.grant_sa_user_access(
"metrics-test-user", "lightspeed-operator-ols-metrics-reader"
)


def setup_rbac(namespace: str) -> None:
"""Set up pod-reader role and binding.

Args:
namespace: The Kubernetes namespace for RBAC configuration.
"""
print("Ensuring 'pod-reader' role and rolebinding exist...")
cluster_utils.run_oc(
[
"create",
"role",
"pod-reader",
"--verb=get,list",
"--resource=pods",
"--namespace",
namespace,
],
ignore_existing_resource=True,
)

cluster_utils.run_oc(
[
"create",
"rolebinding",
"test-user-pod-reader",
"--role=pod-reader",
f"--serviceaccount={namespace}:test-user",
"--namespace",
namespace,
],
ignore_existing_resource=True,
)
print("RBAC setup verified.")


def wait_for_deployment() -> None:
"""Wait for OLS deployment and pods to be ready.

Ensures the lightspeed-app-server deployment is available and pods are running.
"""
print("Waiting for OLS controller to apply updated configuration...")
retry_until_timeout_or_success(
30,
6,
lambda: cluster_utils.run_oc(
[
"get",
"deployment",
"lightspeed-app-server",
"--ignore-not-found",
"-o",
"name",
]
).stdout.strip()
== "deployment.apps/lightspeed-app-server",
"Waiting for lightspeed-app-server deployment to be detected",
)

print("Waiting for pods to be ready after configuration update...")
cluster_utils.wait_for_running_pod()


def setup_route() -> str:
"""Set up route and return OLS URL.

Returns:
The HTTPS URL for accessing the OLS service.
"""
try:
cluster_utils.run_oc(["delete", "route", "ols"], ignore_existing_resource=False)
except Exception:
print("No existing route to delete. Continuing...")

print("Creating route for OLS access")
cluster_utils.run_oc(
["create", "-f", "tests/config/operator_install/route.yaml"],
ignore_existing_resource=False,
)

url = cluster_utils.run_oc(
["get", "route", "ols", "-o", "jsonpath='{.spec.host}'"]
).stdout.strip("'")

return f"https://{url}"


def adapt_ols_config() -> tuple[str, str, str]:
"""Adapt OLS configuration for different providers dynamically.

Ensures RBAC, service accounts, and OLS route exist for test execution.

Returns:
tuple: (ols_url, token, metrics_token)
"""
print("Adapting OLS configuration for provider switching")
provider_env = os.getenv("PROVIDER", "openai")
provider_list = provider_env.split() or ["openai"]
print(f"Configuring for providers: {provider_list}")

namespace = "openshift-lightspeed"

# Apply the correct OLSConfig CR
try:
apply_olsconfig(provider_list)
except Exception as e:
raise RuntimeError(f"Error applying OLSConfig CR: {e}") from e

# Scale controller manager back up to reconcile changes to the olsconfig
cluster_utils.run_oc(
[
"scale",
"deployment/lightspeed-operator-controller-manager",
"--replicas",
"1",
]
)
retry_until_timeout_or_success(
30,
6,
lambda: cluster_utils.get_pod_by_prefix(
prefix="lightspeed-operator-controller-manager"
),
)

wait_for_deployment()

# scale down the operator controller manager to avoid it interfering with the tests
cluster_utils.run_oc(
[
"scale",
"deployment/lightspeed-operator-controller-manager",
"--replicas",
"0",
]
)
cluster_utils.run_oc(
[
"scale",
"deployment/lightspeed-app-server",
"--replicas",
"0",
]
)

# Update OLS configmap with additional e2e configurations
try:
update_ols_configmap()
except Exception as e:
print(f"Warning: Could not update OLS configmap: {e}")
cluster_utils.run_oc(
[
"scale",
"deployment/lightspeed-app-server",
"--replicas",
"1",
]
)

# Ensure service accounts exist
try:
setup_service_accounts(namespace)
except Exception as e:
raise RuntimeError(
f"Error ensuring service accounts or access roles: {e}"
) from e

# Ensure pod-reader role and binding exist
try:
setup_rbac(namespace)
except Exception as e:
print(f"Warning: Could not ensure pod-reader role/binding: {e}")

# Wait for deployment and pods
wait_for_deployment()

# Disable collector script by default to avoid running during all tests
pod_name = cluster_utils.get_pod_by_prefix()[0]
print(f"Disabling collector on pod {pod_name}")
cluster_utils.create_file(pod_name, OLS_COLLECTOR_DISABLING_FILE, "")

# Fetch tokens for service accounts
print("Fetching tokens for service accounts...")
token = cluster_utils.get_token_for("test-user")
metrics_token = cluster_utils.get_token_for("metrics-test-user")

# Set up route and get URL
ols_url = setup_route()
wait_for_ols(ols_url)

print("OLS configuration and access setup completed successfully.")
return ols_url, token, metrics_token


if __name__ == "__main__":
adapt_ols_config()
Loading