Skip to content

Commit 10a5016

Browse files
committed
debug
1 parent 299b5ae commit 10a5016

File tree

1 file changed

+277
-11
lines changed

1 file changed

+277
-11
lines changed

.github/workflows/token-federation-test.yml

Lines changed: 277 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,16 @@ jobs:
7171
with open('src/databricks/sql/auth/token_federation.py', 'r') as f:
7272
content = f.read()
7373
74-
# Add verbose request debugging
74+
# Add token debugging
7575
modified = content.replace(
76+
'def _exchange_token(self, token, force_refresh=False):',
77+
'def _exchange_token(self, token, force_refresh=False):\n # Debug token info\n import jwt\n try:\n decoded = jwt.decode(token, options={"verify_signature": False})\n print(f"Token issuer: {decoded.get(\'iss\')}")\n print(f"Token subject: {decoded.get(\'sub\')}")\n print(f"Token audience: {decoded.get(\'aud\') if isinstance(decoded.get(\'aud\'), str) else decoded.get(\'aud\', [])[0] if decoded.get(\'aud\') else \'\'}")\n except Exception as e:\n print(f"Unable to decode token: {str(e)}")'
78+
)
79+
80+
# Add verbose request debugging
81+
modified = modified.replace(
7682
'try:\n # Make the token exchange request',
77-
'try:\n import urllib.parse\n # Debug full request\n print(f"Token endpoint: {self.token_endpoint}")\n print(f"Request parameters: {urllib.parse.urlencode(params)}")\n print(f"Request headers: {headers}")\n # Make the token exchange request'
83+
'try:\n import urllib.parse\n # Debug full request\n print(f"Connecting to Databricks at {self.host}")\n print(f"Token endpoint: {self.token_endpoint}")\n print(f"Request parameters: {urllib.parse.urlencode(params)}")\n print(f"Request headers: {headers}")\n # Make the token exchange request'
7884
)
7985
8086
# Add verbose response debugging
@@ -86,7 +92,7 @@ jobs:
8692
# Improve error handling
8793
modified = modified.replace(
8894
'except RequestException as e:',
89-
'except RequestException as e:\n if hasattr(e, "response") and e.response:\n print(f"Error response status: {e.response.status_code}")\n print(f"Error response headers: {dict(e.response.headers)}")\n print(f"Error response text: {e.response.text}")'
95+
'except RequestException as e:\n print(f"Failed to perform token exchange: {str(e)}")\n if hasattr(e, "response") and e.response:\n print(f"Error response status: {e.response.status_code}")\n print(f"Error response headers: {dict(e.response.headers)}")\n print(f"Error response text: {e.response.text}")'
9096
)
9197
9298
with open('src/databricks/sql/auth/token_federation.py', 'w') as f:
@@ -98,9 +104,73 @@ jobs:
98104
99105
chmod +x patch_for_debugging.py
100106
107+
- name: Install PyJWT for token debugging
108+
run: pip install pyjwt
109+
101110
- name: Apply debugging patches to token_federation.py
102111
run: python patch_for_debugging.py
103112

113+
- name: Create audience fix patch script
114+
run: |
115+
cat > patch_for_audience_fix.py << 'EOF'
116+
#!/usr/bin/env python3
117+
118+
def patch_code():
119+
with open('src/databricks/sql/auth/token_federation.py', 'r') as f:
120+
content = f.read()
121+
122+
# Fix audience handling
123+
modified = content.replace(
124+
'def _exchange_token(self, token, force_refresh=False):',
125+
'''def _exchange_token(self, token, force_refresh=False):
126+
# Additional handling for different audience formats
127+
import jwt
128+
try:
129+
# Try both standard and alternative audience formats
130+
audience_tried = False
131+
132+
def try_with_audience(token, audience):
133+
nonlocal audience_tried
134+
if audience_tried:
135+
return None
136+
137+
audience_tried = True
138+
decoded = jwt.decode(token, options={"verify_signature": False})
139+
aud = decoded.get("aud")
140+
141+
# Check if aud is a list and convert to string if needed
142+
if isinstance(aud, list) and len(aud) > 0:
143+
aud = aud[0]
144+
145+
# Print audience for debugging
146+
print(f"Original token audience: {aud}")
147+
148+
if aud != audience:
149+
print(f"WARNING: Token audience '{aud}' doesn't match expected audience '{audience}'")
150+
# We won't modify the token as that would invalidate the signature
151+
152+
return None
153+
154+
# We're just collecting debugging info, not modifying the token
155+
try_with_audience(token, "https://github.com/databricks")
156+
157+
except Exception as e:
158+
print(f"Audience debug error: {str(e)}")
159+
'''
160+
)
161+
162+
with open('src/databricks/sql/auth/token_federation.py', 'w') as f:
163+
f.write(modified)
164+
165+
if __name__ == "__main__":
166+
patch_code()
167+
EOF
168+
169+
chmod +x patch_for_audience_fix.py
170+
171+
- name: Apply audience fix patches
172+
run: python patch_for_audience_fix.py
173+
104174
- name: Get GitHub OIDC token
105175
id: get-id-token
106176
uses: actions/github-script@v7
@@ -110,6 +180,106 @@ jobs:
110180
core.setSecret(token)
111181
core.setOutput('token', token)
112182
183+
- name: Decode and display OIDC token claims
184+
env:
185+
OIDC_TOKEN: ${{ steps.get-id-token.outputs.token }}
186+
run: |
187+
echo "Decoding GitHub OIDC token claims..."
188+
python -c '
189+
import sys, base64, json
190+
191+
token = """$OIDC_TOKEN"""
192+
193+
# Parse the token
194+
try:
195+
header, payload, signature = token.split(".")
196+
197+
# Add padding if needed
198+
payload_padding = payload + "=" * (-len(payload) % 4)
199+
200+
# Decode the payload
201+
decoded_payload = base64.b64decode(payload_padding).decode("utf-8")
202+
claims = json.loads(decoded_payload)
203+
204+
# Print important claims
205+
print("\n=== GITHUB OIDC TOKEN CLAIMS ===")
206+
print(f"Issuer (iss): {claims.get(\"iss\")}")
207+
print(f"Subject (sub): {claims.get(\"sub\")}")
208+
print(f"Audience (aud): {claims.get(\"aud\")}")
209+
print(f"Repository: {claims.get(\"repository\")}")
210+
print(f"Repository owner: {claims.get(\"repository_owner\")}")
211+
print(f"Event name: {claims.get(\"event_name\")}")
212+
print(f"Ref: {claims.get(\"ref\")}")
213+
print(f"Workflow ref: {claims.get(\"workflow_ref\")}")
214+
print("\n=== FULL CLAIMS ===")
215+
print(json.dumps(claims, indent=2))
216+
print("===========================\n")
217+
except Exception as e:
218+
print(f"Failed to decode token: {str(e)}")
219+
'
220+
221+
- name: Debug token exchange with curl
222+
env:
223+
DATABRICKS_HOST: ${{ github.event_name == 'workflow_dispatch' && inputs.databricks_host || secrets.DATABRICKS_HOST_FOR_TF }}
224+
IDENTITY_FEDERATION_CLIENT_ID: ${{ github.event_name == 'workflow_dispatch' && inputs.identity_federation_client_id || secrets.IDENTITY_FEDERATION_CLIENT_ID_FOR_TF }}
225+
OIDC_TOKEN: ${{ steps.get-id-token.outputs.token }}
226+
run: |
227+
echo "Attempting direct token exchange with curl..."
228+
echo "Host: $DATABRICKS_HOST"
229+
echo "Client ID: $IDENTITY_FEDERATION_CLIENT_ID"
230+
231+
# Debug token claims before making the request
232+
echo "Token claims:"
233+
python3 -c "
234+
import base64, json, sys
235+
token = \"$OIDC_TOKEN\"
236+
parts = token.split('.')
237+
if len(parts) >= 2:
238+
padding = '=' * (4 - len(parts[1]) % 4)
239+
decoded_bytes = base64.b64decode(parts[1] + padding)
240+
decoded_str = decoded_bytes.decode('utf-8')
241+
claims = json.loads(decoded_str)
242+
print(f\"Issuer: {claims.get('iss', 'unknown')}\")
243+
print(f\"Subject: {claims.get('sub', 'unknown')}\")
244+
print(f\"Audience: {claims.get('aud', 'unknown')}\")
245+
else:
246+
print('Invalid token format')
247+
"
248+
249+
# Create a properly URL-encoded request
250+
echo "Creating token exchange request..."
251+
curl_data=$(cat <<EOF
252+
client_id=$IDENTITY_FEDERATION_CLIENT_ID&\
253+
subject_token=$OIDC_TOKEN&\
254+
subject_token_type=urn:ietf:params:oauth:token-type:jwt&\
255+
grant_type=urn:ietf:params:oauth:grant-type:token-exchange&\
256+
scope=sql
257+
EOF
258+
)
259+
260+
# Print request details (except the token)
261+
echo "Request URL: https://$DATABRICKS_HOST/oidc/v1/token"
262+
echo "Request data: $(echo "$curl_data" | sed 's/subject_token=.*&/subject_token=REDACTED&/')"
263+
264+
# Make the request with detailed info
265+
echo "Sending request..."
266+
response=$(curl -v -s -X POST "https://$DATABRICKS_HOST/oidc/v1/token" \
267+
--data-raw "$curl_data" \
268+
-H "Content-Type: application/x-www-form-urlencoded" \
269+
-H "Accept: application/json" \
270+
2>&1)
271+
272+
# Extract and display results
273+
echo "Response:"
274+
echo "$response"
275+
276+
# Extract HTTP status if possible
277+
status_code=$(echo "$response" | grep -o "< HTTP/[0-9.]* [0-9]*" | grep -o "[0-9]*$" || echo "unknown")
278+
echo "HTTP Status Code: $status_code"
279+
280+
# Don't fail the workflow if curl fails
281+
exit 0
282+
113283
- name: Create test script
114284
run: |
115285
cat > test_github_token_federation.py << 'EOF'
@@ -127,7 +297,9 @@ jobs:
127297
import sys
128298
import json
129299
import base64
300+
import requests
130301
from databricks import sql
302+
import time
131303
132304
def decode_jwt(token):
133305
"""Decode and return the claims from a JWT token."""
@@ -137,6 +309,7 @@ jobs:
137309
raise ValueError("Invalid JWT format")
138310
139311
payload = parts[1]
312+
# Add padding if needed
140313
padding = '=' * (4 - len(payload) % 4)
141314
payload += padding
142315
@@ -146,6 +319,55 @@ jobs:
146319
print(f"Failed to decode token: {str(e)}")
147320
return None
148321
322+
def test_direct_token_exchange(host, token, client_id, audience=None):
323+
"""Directly test token exchange with the Databricks API."""
324+
try:
325+
url = f"https://{host}/oidc/v1/token"
326+
data = {
327+
"client_id": client_id,
328+
"subject_token": token,
329+
"subject_token_type": "urn:ietf:params:oauth:token-type:jwt",
330+
"grant_type": "urn:ietf:params:oauth:grant-type:token-exchange",
331+
"scope": "sql",
332+
"return_original_token_if_authenticated": "true"
333+
}
334+
335+
headers = {
336+
"Content-Type": "application/x-www-form-urlencoded",
337+
"Accept": "application/json"
338+
}
339+
340+
print(f"Testing direct token exchange with {url}")
341+
print(f"Request parameters: {data}")
342+
343+
# Add debugging info
344+
claims = decode_jwt(token)
345+
if claims:
346+
print(f"Token issuer: {claims.get('iss', 'unknown')}")
347+
print(f"Token subject: {claims.get('sub', 'unknown')}")
348+
print(f"Token audience: {claims.get('aud', 'unknown')}")
349+
350+
# If audience was specified in policy but doesn't match token
351+
if audience and audience != claims.get('aud'):
352+
print(f"WARNING: Expected audience '{audience}' doesn't match token audience '{claims.get('aud')}'")
353+
354+
response = requests.post(url, data=data, headers=headers)
355+
356+
print(f"Status code: {response.status_code}")
357+
print(f"Response headers: {dict(response.headers)}")
358+
print(f"Response content: {response.text}")
359+
360+
if response.status_code == 200:
361+
try:
362+
return json.loads(response.text).get("access_token")
363+
except json.JSONDecodeError:
364+
print("Failed to parse response JSON")
365+
return None
366+
return None
367+
except Exception as e:
368+
print(f"Direct token exchange failed: {str(e)}")
369+
return None
370+
149371
def main():
150372
# Get GitHub OIDC token
151373
github_token = os.environ.get("OIDC_TOKEN")
@@ -164,20 +386,63 @@ jobs:
164386
165387
claims = decode_jwt(github_token)
166388
if claims:
389+
print("\n=== GitHub OIDC Token Claims ===")
167390
print(f"Token issuer: {claims.get('iss', 'unknown')}")
168391
print(f"Token subject: {claims.get('sub', 'unknown')}")
169392
print(f"Token audience: {claims.get('aud', 'unknown')}")
393+
print(f"Token expiration: {claims.get('exp', 'unknown')}")
394+
print(f"Repository: {claims.get('repository', 'unknown')}")
395+
print(f"Workflow ref: {claims.get('workflow_ref', 'unknown')}")
396+
print(f"Event name: {claims.get('event_name', 'unknown')}")
397+
print("===============================\n")
398+
399+
# Try token exchange with several possible audience values
400+
audience_values = [
401+
"https://github.com/databricks", # Standard audience for GitHub tokens
402+
"https://github.com", # Alternative audience
403+
None # No audience
404+
]
405+
406+
# Direct token exchange test
407+
access_token = None
408+
for audience in audience_values:
409+
print(f"\n=== Testing Direct Token Exchange (audience={audience}) ===")
410+
result = test_direct_token_exchange(host, github_token, identity_federation_client_id, audience)
411+
if result:
412+
print("Direct token exchange successful!")
413+
access_token = result
414+
token_claims = decode_jwt(result)
415+
if token_claims:
416+
print(f"Databricks token subject: {token_claims.get('sub', 'unknown')}")
417+
break
418+
print(f"Token exchange failed with audience={audience}")
419+
# Add a small delay between attempts
420+
time.sleep(1)
421+
422+
if not access_token:
423+
print("All token exchange attempts failed")
424+
print("=====================================\n")
425+
else:
426+
print("=====================================\n")
170427
171428
try:
172429
# Connect to Databricks using token federation
430+
print(f"\n=== Testing Connection via Connector ===")
173431
print(f"Connecting to Databricks at {host}{http_path}")
174-
with sql.connect(
175-
server_hostname=host,
176-
http_path=http_path,
177-
access_token=github_token,
178-
auth_type="token-federation",
179-
identity_federation_client_id=identity_federation_client_id
180-
) as connection:
432+
print(f"Using client ID: {identity_federation_client_id}")
433+
434+
connection_params = {
435+
"server_hostname": host,
436+
"http_path": http_path,
437+
"access_token": github_token,
438+
"auth_type": "token-federation",
439+
"identity_federation_client_id": identity_federation_client_id,
440+
}
441+
442+
print("Connection parameters:")
443+
print(json.dumps({k: v if k != 'access_token' else '***' for k, v in connection_params.items()}, indent=2))
444+
445+
with sql.connect(**connection_params) as connection:
181446
print("Connection established successfully")
182447
183448
# Execute a simple query
@@ -195,6 +460,7 @@ jobs:
195460
return True
196461
except Exception as e:
197462
print(f"Error connecting to Databricks: {str(e)}")
463+
print("===================================\n")
198464
sys.exit(1)
199465
200466
if __name__ == "__main__":
@@ -206,7 +472,7 @@ jobs:
206472
env:
207473
DATABRICKS_HOST_FOR_TF: ${{ github.event_name == 'workflow_dispatch' && inputs.databricks_host || secrets.DATABRICKS_HOST_FOR_TF }}
208474
DATABRICKS_HTTP_PATH_FOR_TF: ${{ github.event_name == 'workflow_dispatch' && inputs.databricks_http_path || secrets.DATABRICKS_HTTP_PATH_FOR_TF }}
209-
IDENTITY_FEDERATION_CLIENT_ID: ${{ github.event_name == 'workflow_dispatch' && inputs.identity_federation_client_id || secrets.IDENTITY_FEDERATION_CLIENT_ID }}
475+
IDENTITY_FEDERATION_CLIENT_ID_FOR_TF: ${{ github.event_name == 'workflow_dispatch' && inputs.identity_federation_client_id || secrets.IDENTITY_FEDERATION_CLIENT_ID_FOR_TF }}
210476
OIDC_TOKEN: ${{ steps.get-id-token.outputs.token }}
211477
run: |
212478
python test_github_token_federation.py

0 commit comments

Comments
 (0)