|
| 1 | +"""Functions to adapt OLS configuration for different providers. |
| 2 | +
|
| 3 | +Handles multi-provider test scenarios dynamically. |
| 4 | +""" |
| 5 | + |
| 6 | +import os |
| 7 | + |
| 8 | +import yaml |
| 9 | + |
| 10 | +from ols.constants import DEFAULT_CONFIGURATION_FILE |
| 11 | +from tests.e2e.utils import cluster as cluster_utils |
| 12 | +from tests.e2e.utils.constants import OLS_COLLECTOR_DISABLING_FILE |
| 13 | +from tests.e2e.utils.retry import retry_until_timeout_or_success |
| 14 | +from tests.e2e.utils.wait_for_ols import wait_for_ols |
| 15 | + |
| 16 | + |
| 17 | +def apply_olsconfig(provider_list: list[str]) -> None: |
| 18 | + """Apply the correct OLSConfig CR based on provider configuration. |
| 19 | +
|
| 20 | + Args: |
| 21 | + provider_list: List of provider names to configure. |
| 22 | + """ |
| 23 | + if len(provider_list) == 1: |
| 24 | + provider = provider_list[0] |
| 25 | + crd_yml_name = f"olsconfig.crd.{provider}" |
| 26 | + ols_config_suffix = os.getenv("OLS_CONFIG_SUFFIX", "default") |
| 27 | + if ols_config_suffix != "default": |
| 28 | + crd_yml_name += f"_{ols_config_suffix}" |
| 29 | + print(f"Applying olsconfig CR from {crd_yml_name}.yaml") |
| 30 | + cluster_utils.run_oc( |
| 31 | + ["apply", "-f", f"tests/config/operator_install/{crd_yml_name}.yaml"], |
| 32 | + ignore_existing_resource=False, |
| 33 | + ) |
| 34 | + else: |
| 35 | + print("Applying evaluation olsconfig CR for multiple providers") |
| 36 | + cluster_utils.run_oc( |
| 37 | + [ |
| 38 | + "apply", |
| 39 | + "-f", |
| 40 | + "tests/config/operator_install/olsconfig.crd.evaluation.yaml", |
| 41 | + ], |
| 42 | + ignore_existing_resource=True, |
| 43 | + ) |
| 44 | + print("OLSConfig CR applied successfully") |
| 45 | + |
| 46 | + |
| 47 | +def update_ols_configmap() -> None: |
| 48 | + """Update OLS configmap with additional e2e test configurations. |
| 49 | +
|
| 50 | + Configures logging levels and user data collector settings for testing. |
| 51 | + """ |
| 52 | + try: |
| 53 | + print("Updating OLS configmap for e2e tests...") |
| 54 | + # Get the current configmap |
| 55 | + configmap_yaml = cluster_utils.run_oc( |
| 56 | + ["get", "cm/olsconfig", "-o", "yaml"] |
| 57 | + ).stdout |
| 58 | + configmap = yaml.safe_load(configmap_yaml) |
| 59 | + olsconfig = yaml.safe_load(configmap["data"][DEFAULT_CONFIGURATION_FILE]) |
| 60 | + |
| 61 | + # Ensure proper logging config for e2e tests |
| 62 | + if "ols_config" not in olsconfig: |
| 63 | + olsconfig["ols_config"] = {} |
| 64 | + if "logging_config" not in olsconfig["ols_config"]: |
| 65 | + olsconfig["ols_config"]["logging_config"] = {} |
| 66 | + |
| 67 | + # Set INFO level to avoid redacted logs |
| 68 | + olsconfig["ols_config"]["logging_config"]["lib_log_level"] = "INFO" |
| 69 | + |
| 70 | + # Add user data collector config for e2e tests |
| 71 | + olsconfig["user_data_collector_config"] = { |
| 72 | + "data_storage": "/app-root/ols-user-data", |
| 73 | + "log_level": "debug", |
| 74 | + "collection_interval": 10, |
| 75 | + "run_without_initial_wait": True, |
| 76 | + "ingress_env": "stage", |
| 77 | + "cp_offline_token": os.getenv("CP_OFFLINE_TOKEN", ""), |
| 78 | + } |
| 79 | + |
| 80 | + # Update the configmap |
| 81 | + configmap["data"][DEFAULT_CONFIGURATION_FILE] = yaml.dump(olsconfig) |
| 82 | + updated_configmap = yaml.dump(configmap) |
| 83 | + cluster_utils.run_oc(["apply", "-f", "-"], command=updated_configmap) |
| 84 | + print("OLS configmap updated successfully") |
| 85 | + |
| 86 | + except Exception as e: |
| 87 | + print(f"Warning: Could not update OLS configmap: {e}") |
| 88 | + |
| 89 | + |
| 90 | +def setup_service_accounts(namespace: str) -> None: |
| 91 | + """Set up service accounts and access roles. |
| 92 | +
|
| 93 | + Args: |
| 94 | + namespace: The Kubernetes namespace to create service accounts in. |
| 95 | + """ |
| 96 | + print("Ensuring 'test-user' service account exists...") |
| 97 | + cluster_utils.run_oc( |
| 98 | + ["create", "sa", "test-user", "-n", namespace], |
| 99 | + ignore_existing_resource=True, |
| 100 | + ) |
| 101 | + |
| 102 | + print("Ensuring 'metrics-test-user' service account exists...") |
| 103 | + cluster_utils.run_oc( |
| 104 | + ["create", "sa", "metrics-test-user", "-n", namespace], |
| 105 | + ignore_existing_resource=True, |
| 106 | + ) |
| 107 | + |
| 108 | + print("Granting access roles to service accounts...") |
| 109 | + cluster_utils.grant_sa_user_access("test-user", "lightspeed-operator-query-access") |
| 110 | + cluster_utils.grant_sa_user_access( |
| 111 | + "metrics-test-user", "lightspeed-operator-ols-metrics-reader" |
| 112 | + ) |
| 113 | + |
| 114 | + |
| 115 | +def setup_rbac(namespace: str) -> None: |
| 116 | + """Set up pod-reader role and binding. |
| 117 | +
|
| 118 | + Args: |
| 119 | + namespace: The Kubernetes namespace for RBAC configuration. |
| 120 | + """ |
| 121 | + print("Ensuring 'pod-reader' role and rolebinding exist...") |
| 122 | + cluster_utils.run_oc( |
| 123 | + [ |
| 124 | + "create", |
| 125 | + "role", |
| 126 | + "pod-reader", |
| 127 | + "--verb=get,list", |
| 128 | + "--resource=pods", |
| 129 | + "--namespace", |
| 130 | + namespace, |
| 131 | + ], |
| 132 | + ignore_existing_resource=True, |
| 133 | + ) |
| 134 | + |
| 135 | + cluster_utils.run_oc( |
| 136 | + [ |
| 137 | + "create", |
| 138 | + "rolebinding", |
| 139 | + "test-user-pod-reader", |
| 140 | + "--role=pod-reader", |
| 141 | + f"--serviceaccount={namespace}:test-user", |
| 142 | + "--namespace", |
| 143 | + namespace, |
| 144 | + ], |
| 145 | + ignore_existing_resource=True, |
| 146 | + ) |
| 147 | + print("RBAC setup verified.") |
| 148 | + |
| 149 | + |
| 150 | +def wait_for_deployment() -> None: |
| 151 | + """Wait for OLS deployment and pods to be ready. |
| 152 | +
|
| 153 | + Ensures the lightspeed-app-server deployment is available and pods are running. |
| 154 | + """ |
| 155 | + print("Waiting for OLS controller to apply updated configuration...") |
| 156 | + retry_until_timeout_or_success( |
| 157 | + 30, |
| 158 | + 6, |
| 159 | + lambda: cluster_utils.run_oc( |
| 160 | + [ |
| 161 | + "get", |
| 162 | + "deployment", |
| 163 | + "lightspeed-app-server", |
| 164 | + "--ignore-not-found", |
| 165 | + "-o", |
| 166 | + "name", |
| 167 | + ] |
| 168 | + ).stdout.strip() |
| 169 | + == "deployment.apps/lightspeed-app-server", |
| 170 | + "Waiting for lightspeed-app-server deployment to be detected", |
| 171 | + ) |
| 172 | + |
| 173 | + print("Waiting for pods to be ready after configuration update...") |
| 174 | + cluster_utils.wait_for_running_pod() |
| 175 | + |
| 176 | + |
| 177 | +def setup_route() -> str: |
| 178 | + """Set up route and return OLS URL. |
| 179 | +
|
| 180 | + Returns: |
| 181 | + The HTTPS URL for accessing the OLS service. |
| 182 | + """ |
| 183 | + try: |
| 184 | + cluster_utils.run_oc(["delete", "route", "ols"], ignore_existing_resource=False) |
| 185 | + except Exception: |
| 186 | + print("No existing route to delete. Continuing...") |
| 187 | + |
| 188 | + print("Creating route for OLS access") |
| 189 | + cluster_utils.run_oc( |
| 190 | + ["create", "-f", "tests/config/operator_install/route.yaml"], |
| 191 | + ignore_existing_resource=False, |
| 192 | + ) |
| 193 | + |
| 194 | + url = cluster_utils.run_oc( |
| 195 | + ["get", "route", "ols", "-o", "jsonpath='{.spec.host}'"] |
| 196 | + ).stdout.strip("'") |
| 197 | + |
| 198 | + return f"https://{url}" |
| 199 | + |
| 200 | + |
| 201 | +def adapt_ols_config() -> tuple[str, str, str]: |
| 202 | + """Adapt OLS configuration for different providers dynamically. |
| 203 | +
|
| 204 | + Ensures RBAC, service accounts, and OLS route exist for test execution. |
| 205 | +
|
| 206 | + Returns: |
| 207 | + tuple: (ols_url, token, metrics_token) |
| 208 | + """ |
| 209 | + print("Adapting OLS configuration for provider switching") |
| 210 | + provider_env = os.getenv("PROVIDER", "openai") |
| 211 | + provider_list = provider_env.split() or ["openai"] |
| 212 | + print(f"Configuring for providers: {provider_list}") |
| 213 | + |
| 214 | + namespace = "openshift-lightspeed" |
| 215 | + try: |
| 216 | + apply_olsconfig(provider_list) |
| 217 | + except Exception as e: |
| 218 | + raise RuntimeError(f"Error applying OLSConfig CR: {e}") from e |
| 219 | + |
| 220 | + # Scale controller manager back up to reconcile changes to the olsconfig |
| 221 | + cluster_utils.run_oc( |
| 222 | + [ |
| 223 | + "scale", |
| 224 | + "deployment/lightspeed-operator-controller-manager", |
| 225 | + "--replicas", |
| 226 | + "1", |
| 227 | + ] |
| 228 | + ) |
| 229 | + retry_until_timeout_or_success( |
| 230 | + 30, |
| 231 | + 6, |
| 232 | + lambda: cluster_utils.get_pod_by_prefix( |
| 233 | + prefix="lightspeed-operator-controller-manager" |
| 234 | + ), |
| 235 | + ) |
| 236 | + |
| 237 | + wait_for_deployment() |
| 238 | + |
| 239 | + # scale down the operator controller manager to avoid it interfering with the tests |
| 240 | + cluster_utils.run_oc( |
| 241 | + [ |
| 242 | + "scale", |
| 243 | + "deployment/lightspeed-operator-controller-manager", |
| 244 | + "--replicas", |
| 245 | + "0", |
| 246 | + ] |
| 247 | + ) |
| 248 | + cluster_utils.run_oc( |
| 249 | + [ |
| 250 | + "scale", |
| 251 | + "deployment/lightspeed-app-server", |
| 252 | + "--replicas", |
| 253 | + "0", |
| 254 | + ] |
| 255 | + ) |
| 256 | + |
| 257 | + # Update OLS configmap with additional e2e configurations |
| 258 | + try: |
| 259 | + update_ols_configmap() |
| 260 | + except Exception as e: |
| 261 | + print(f"Warning: Could not update OLS configmap: {e}") |
| 262 | + cluster_utils.run_oc( |
| 263 | + [ |
| 264 | + "scale", |
| 265 | + "deployment/lightspeed-app-server", |
| 266 | + "--replicas", |
| 267 | + "1", |
| 268 | + ] |
| 269 | + ) |
| 270 | + |
| 271 | + # Ensure service accounts exist |
| 272 | + try: |
| 273 | + setup_service_accounts(namespace) |
| 274 | + except Exception as e: |
| 275 | + raise RuntimeError( |
| 276 | + f"Error ensuring service accounts or access roles: {e}" |
| 277 | + ) from e |
| 278 | + |
| 279 | + # Ensure pod-reader role and binding exist |
| 280 | + try: |
| 281 | + setup_rbac(namespace) |
| 282 | + except Exception as e: |
| 283 | + print(f"Warning: Could not ensure pod-reader role/binding: {e}") |
| 284 | + |
| 285 | + # Wait for deployment and pods |
| 286 | + wait_for_deployment() |
| 287 | + |
| 288 | + # Disable collector script by default to avoid running during all tests |
| 289 | + pod_name = cluster_utils.get_pod_by_prefix()[0] |
| 290 | + print(f"Disabling collector on pod {pod_name}") |
| 291 | + cluster_utils.create_file(pod_name, OLS_COLLECTOR_DISABLING_FILE, "") |
| 292 | + |
| 293 | + # Fetch tokens for service accounts |
| 294 | + print("Fetching tokens for service accounts...") |
| 295 | + token = cluster_utils.get_token_for("test-user") |
| 296 | + metrics_token = cluster_utils.get_token_for("metrics-test-user") |
| 297 | + |
| 298 | + # Set up route and get URL |
| 299 | + ols_url = setup_route() |
| 300 | + wait_for_ols(ols_url) |
| 301 | + |
| 302 | + print("OLS configuration and access setup completed successfully.") |
| 303 | + return ols_url, token, metrics_token |
| 304 | + |
| 305 | + |
| 306 | +if __name__ == "__main__": |
| 307 | + adapt_ols_config() |
0 commit comments