Skip to content

Commit 4ffef9f

Browse files
committed
Functionality to adapt to OLSConfig for multi-provider support.
#Restructuring and adding new logs. #Fixing linting issues.
1 parent 8faa93c commit 4ffef9f

File tree

8 files changed

+388
-43
lines changed

8 files changed

+388
-43
lines changed

tests/e2e/conftest.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818

1919
from scripts.upload_artifact_s3 import upload_artifact_s3
2020
from tests.e2e.utils import client as client_utils
21-
from tests.e2e.utils import ols_installer
21+
from tests.e2e.utils import cluster, ols_installer
22+
from tests.e2e.utils.adapt_ols_config import adapt_ols_config
2223
from tests.e2e.utils.wait_for_ols import wait_for_ols
2324
from tests.scripts.must_gather import must_gather
2425

@@ -45,11 +46,38 @@ def pytest_sessionstart():
4546
# OLS_URL env only needs to be set when running against a local ols instance,
4647
# when ols is run against a cluster the url is retrieved from the cluster.
4748
ols_url = os.getenv("OLS_URL", "")
48-
4949
if "localhost" not in ols_url:
5050
on_cluster = True
5151
try:
52-
ols_url, token, metrics_token = ols_installer.install_ols()
52+
result = cluster.run_oc(
53+
[
54+
"get",
55+
"clusterserviceversion",
56+
"-n",
57+
"openshift-lightspeed",
58+
"-o",
59+
"json",
60+
]
61+
)
62+
csv_data = json.loads(result.stdout)
63+
print(csv_data)
64+
65+
if not csv_data["items"]:
66+
print("OLS Operator is not installed yet.")
67+
ols_url, token, metrics_token = ols_installer.install_ols()
68+
else:
69+
print("OLS Operator is already installed. Skipping install.")
70+
provider = os.getenv("PROVIDER", "openai")
71+
creds = os.getenv("PROVIDER_KEY_PATH", "empty")
72+
# create the llm api key secret ols will mount
73+
provider_list = provider.split()
74+
creds_list = creds.split()
75+
for i, prov in enumerate(provider_list):
76+
ols_installer.create_secrets(
77+
prov, creds_list[i], len(provider_list)
78+
)
79+
ols_url, token, metrics_token = adapt_ols_config()
80+
5381
except Exception as e:
5482
print(f"Error setting up OLS on cluster: {e}")
5583
must_gather()
Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
"""Functions to adapt OLS configuration for different providers.
2+
3+
Handles multi-provider test scenarios dynamically.
4+
"""
5+
6+
import os
7+
8+
import yaml
9+
10+
from ols.constants import DEFAULT_CONFIGURATION_FILE
11+
from tests.e2e.utils import cluster as cluster_utils
12+
from tests.e2e.utils.constants import OLS_COLLECTOR_DISABLING_FILE
13+
from tests.e2e.utils.retry import retry_until_timeout_or_success
14+
from tests.e2e.utils.wait_for_ols import wait_for_ols
15+
16+
17+
def apply_olsconfig(provider_list: list[str]) -> None:
18+
"""Apply the correct OLSConfig CR based on provider configuration.
19+
20+
Args:
21+
provider_list: List of provider names to configure.
22+
"""
23+
if len(provider_list) == 1:
24+
provider = provider_list[0]
25+
crd_yml_name = f"olsconfig.crd.{provider}"
26+
ols_config_suffix = os.getenv("OLS_CONFIG_SUFFIX", "default")
27+
if ols_config_suffix != "default":
28+
crd_yml_name += f"_{ols_config_suffix}"
29+
print(f"Applying olsconfig CR from {crd_yml_name}.yaml")
30+
cluster_utils.run_oc(
31+
["apply", "-f", f"tests/config/operator_install/{crd_yml_name}.yaml"],
32+
ignore_existing_resource=False,
33+
)
34+
else:
35+
print("Applying evaluation olsconfig CR for multiple providers")
36+
cluster_utils.run_oc(
37+
[
38+
"apply",
39+
"-f",
40+
"tests/config/operator_install/olsconfig.crd.evaluation.yaml",
41+
],
42+
ignore_existing_resource=True,
43+
)
44+
print("OLSConfig CR applied successfully")
45+
46+
47+
def update_ols_configmap() -> None:
48+
"""Update OLS configmap with additional e2e test configurations.
49+
50+
Configures logging levels and user data collector settings for testing.
51+
"""
52+
try:
53+
print("Updating OLS configmap for e2e tests...")
54+
# Get the current configmap
55+
configmap_yaml = cluster_utils.run_oc(
56+
["get", "cm/olsconfig", "-o", "yaml"]
57+
).stdout
58+
configmap = yaml.safe_load(configmap_yaml)
59+
olsconfig = yaml.safe_load(configmap["data"][DEFAULT_CONFIGURATION_FILE])
60+
61+
# Ensure proper logging config for e2e tests
62+
if "ols_config" not in olsconfig:
63+
olsconfig["ols_config"] = {}
64+
if "logging_config" not in olsconfig["ols_config"]:
65+
olsconfig["ols_config"]["logging_config"] = {}
66+
67+
# Set INFO level to avoid redacted logs
68+
olsconfig["ols_config"]["logging_config"]["lib_log_level"] = "INFO"
69+
70+
# Add user data collector config for e2e tests
71+
olsconfig["user_data_collector_config"] = {
72+
"data_storage": "/app-root/ols-user-data",
73+
"log_level": "debug",
74+
"collection_interval": 10,
75+
"run_without_initial_wait": True,
76+
"ingress_env": "stage",
77+
"cp_offline_token": os.getenv("CP_OFFLINE_TOKEN", ""),
78+
}
79+
80+
# Update the configmap
81+
configmap["data"][DEFAULT_CONFIGURATION_FILE] = yaml.dump(olsconfig)
82+
updated_configmap = yaml.dump(configmap)
83+
cluster_utils.run_oc(["apply", "-f", "-"], command=updated_configmap)
84+
print("OLS configmap updated successfully")
85+
86+
except Exception as e:
87+
print(f"Warning: Could not update OLS configmap: {e}")
88+
89+
90+
def setup_service_accounts(namespace: str) -> None:
91+
"""Set up service accounts and access roles.
92+
93+
Args:
94+
namespace: The Kubernetes namespace to create service accounts in.
95+
"""
96+
print("Ensuring 'test-user' service account exists...")
97+
cluster_utils.run_oc(
98+
["create", "sa", "test-user", "-n", namespace],
99+
ignore_existing_resource=True,
100+
)
101+
102+
print("Ensuring 'metrics-test-user' service account exists...")
103+
cluster_utils.run_oc(
104+
["create", "sa", "metrics-test-user", "-n", namespace],
105+
ignore_existing_resource=True,
106+
)
107+
108+
print("Granting access roles to service accounts...")
109+
cluster_utils.grant_sa_user_access("test-user", "lightspeed-operator-query-access")
110+
cluster_utils.grant_sa_user_access(
111+
"metrics-test-user", "lightspeed-operator-ols-metrics-reader"
112+
)
113+
114+
115+
def setup_rbac(namespace: str) -> None:
116+
"""Set up pod-reader role and binding.
117+
118+
Args:
119+
namespace: The Kubernetes namespace for RBAC configuration.
120+
"""
121+
print("Ensuring 'pod-reader' role and rolebinding exist...")
122+
cluster_utils.run_oc(
123+
[
124+
"create",
125+
"role",
126+
"pod-reader",
127+
"--verb=get,list",
128+
"--resource=pods",
129+
"--namespace",
130+
namespace,
131+
],
132+
ignore_existing_resource=True,
133+
)
134+
135+
cluster_utils.run_oc(
136+
[
137+
"create",
138+
"rolebinding",
139+
"test-user-pod-reader",
140+
"--role=pod-reader",
141+
f"--serviceaccount={namespace}:test-user",
142+
"--namespace",
143+
namespace,
144+
],
145+
ignore_existing_resource=True,
146+
)
147+
print("RBAC setup verified.")
148+
149+
150+
def wait_for_deployment() -> None:
151+
"""Wait for OLS deployment and pods to be ready.
152+
153+
Ensures the lightspeed-app-server deployment is available and pods are running.
154+
"""
155+
print("Waiting for OLS controller to apply updated configuration...")
156+
retry_until_timeout_or_success(
157+
30,
158+
6,
159+
lambda: cluster_utils.run_oc(
160+
[
161+
"get",
162+
"deployment",
163+
"lightspeed-app-server",
164+
"--ignore-not-found",
165+
"-o",
166+
"name",
167+
]
168+
).stdout.strip()
169+
== "deployment.apps/lightspeed-app-server",
170+
"Waiting for lightspeed-app-server deployment to be detected",
171+
)
172+
173+
print("Waiting for pods to be ready after configuration update...")
174+
cluster_utils.wait_for_running_pod()
175+
176+
177+
def setup_route() -> str:
178+
"""Set up route and return OLS URL.
179+
180+
Returns:
181+
The HTTPS URL for accessing the OLS service.
182+
"""
183+
try:
184+
cluster_utils.run_oc(["delete", "route", "ols"], ignore_existing_resource=False)
185+
except Exception:
186+
print("No existing route to delete. Continuing...")
187+
188+
print("Creating route for OLS access")
189+
cluster_utils.run_oc(
190+
["create", "-f", "tests/config/operator_install/route.yaml"],
191+
ignore_existing_resource=False,
192+
)
193+
194+
url = cluster_utils.run_oc(
195+
["get", "route", "ols", "-o", "jsonpath='{.spec.host}'"]
196+
).stdout.strip("'")
197+
198+
return f"https://{url}"
199+
200+
201+
def adapt_ols_config() -> tuple[str, str, str]:
202+
"""Adapt OLS configuration for different providers dynamically.
203+
204+
Ensures RBAC, service accounts, and OLS route exist for test execution.
205+
206+
Returns:
207+
tuple: (ols_url, token, metrics_token)
208+
"""
209+
print("Adapting OLS configuration for provider switching")
210+
provider_env = os.getenv("PROVIDER", "openai")
211+
provider_list = provider_env.split() or ["openai"]
212+
print(f"Configuring for providers: {provider_list}")
213+
214+
namespace = "openshift-lightspeed"
215+
try:
216+
apply_olsconfig(provider_list)
217+
except Exception as e:
218+
raise RuntimeError(f"Error applying OLSConfig CR: {e}") from e
219+
220+
# Scale controller manager back up to reconcile changes to the olsconfig
221+
cluster_utils.run_oc(
222+
[
223+
"scale",
224+
"deployment/lightspeed-operator-controller-manager",
225+
"--replicas",
226+
"1",
227+
]
228+
)
229+
retry_until_timeout_or_success(
230+
30,
231+
6,
232+
lambda: cluster_utils.get_pod_by_prefix(
233+
prefix="lightspeed-operator-controller-manager"
234+
),
235+
)
236+
237+
wait_for_deployment()
238+
239+
# scale down the operator controller manager to avoid it interfering with the tests
240+
cluster_utils.run_oc(
241+
[
242+
"scale",
243+
"deployment/lightspeed-operator-controller-manager",
244+
"--replicas",
245+
"0",
246+
]
247+
)
248+
cluster_utils.run_oc(
249+
[
250+
"scale",
251+
"deployment/lightspeed-app-server",
252+
"--replicas",
253+
"0",
254+
]
255+
)
256+
257+
# Update OLS configmap with additional e2e configurations
258+
try:
259+
update_ols_configmap()
260+
except Exception as e:
261+
print(f"Warning: Could not update OLS configmap: {e}")
262+
cluster_utils.run_oc(
263+
[
264+
"scale",
265+
"deployment/lightspeed-app-server",
266+
"--replicas",
267+
"1",
268+
]
269+
)
270+
271+
# Ensure service accounts exist
272+
try:
273+
setup_service_accounts(namespace)
274+
except Exception as e:
275+
raise RuntimeError(
276+
f"Error ensuring service accounts or access roles: {e}"
277+
) from e
278+
279+
# Ensure pod-reader role and binding exist
280+
try:
281+
setup_rbac(namespace)
282+
except Exception as e:
283+
print(f"Warning: Could not ensure pod-reader role/binding: {e}")
284+
285+
# Wait for deployment and pods
286+
wait_for_deployment()
287+
288+
# Disable collector script by default to avoid running during all tests
289+
pod_name = cluster_utils.get_pod_by_prefix()[0]
290+
print(f"Disabling collector on pod {pod_name}")
291+
cluster_utils.create_file(pod_name, OLS_COLLECTOR_DISABLING_FILE, "")
292+
293+
# Fetch tokens for service accounts
294+
print("Fetching tokens for service accounts...")
295+
token = cluster_utils.get_token_for("test-user")
296+
metrics_token = cluster_utils.get_token_for("metrics-test-user")
297+
298+
# Set up route and get URL
299+
ols_url = setup_route()
300+
wait_for_ols(ols_url)
301+
302+
print("OLS configuration and access setup completed successfully.")
303+
return ols_url, token, metrics_token
304+
305+
306+
if __name__ == "__main__":
307+
adapt_ols_config()

0 commit comments

Comments
 (0)