@@ -71,10 +71,16 @@ jobs:
7171 with open('src/databricks/sql/auth/token_federation.py', 'r') as f:
7272 content = f.read()
7373
74- # Add verbose request debugging
74+ # Add token debugging
7575 modified = content.replace(
76+ 'def _exchange_token(self, token, force_refresh=False):',
77+ 'def _exchange_token(self, token, force_refresh=False):\n # Debug token info\n import jwt\n try:\n decoded = jwt.decode(token, options={"verify_signature": False})\n print(f"Token issuer: {decoded.get(\'iss\')}")\n print(f"Token subject: {decoded.get(\'sub\')}")\n print(f"Token audience: {decoded.get(\'aud\') if isinstance(decoded.get(\'aud\'), str) else decoded.get(\'aud\', [])[0] if decoded.get(\'aud\') else \'\'}")\n except Exception as e:\n print(f"Unable to decode token: {str(e)}")'
78+ )
79+
80+ # Add verbose request debugging
81+ modified = modified.replace(
7682 'try:\n # Make the token exchange request',
77- 'try:\n import urllib.parse\n # Debug full request\n print(f"Token endpoint: {self.token_endpoint}")\n print(f"Request parameters: {urllib.parse.urlencode(params)}")\n print(f"Request headers: {headers}")\n # Make the token exchange request'
83+ 'try:\n import urllib.parse\n # Debug full request\n print(f"Connecting to Databricks at {self.host}")\n print(f" Token endpoint: {self.token_endpoint}")\n print(f"Request parameters: {urllib.parse.urlencode(params)}")\n print(f"Request headers: {headers}")\n # Make the token exchange request'
7884 )
7985
8086 # Add verbose response debugging
8692 # Improve error handling
8793 modified = modified.replace(
8894 'except RequestException as e:',
89- 'except RequestException as e:\n if hasattr(e, "response") and e.response:\n print(f"Error response status: {e.response.status_code}")\n print(f"Error response headers: {dict(e.response.headers)}")\n print(f"Error response text: {e.response.text}")'
95+ 'except RequestException as e:\n print(f"Failed to perform token exchange: {str(e)}")\n if hasattr(e, "response") and e.response:\n print(f"Error response status: {e.response.status_code}")\n print(f"Error response headers: {dict(e.response.headers)}")\n print(f"Error response text: {e.response.text}")'
9096 )
9197
9298 with open('src/databricks/sql/auth/token_federation.py', 'w') as f:
@@ -98,9 +104,73 @@ jobs:
98104
99105 chmod +x patch_for_debugging.py
100106
107+ - name : Install PyJWT for token debugging
108+ run : pip install pyjwt
109+
101110 - name : Apply debugging patches to token_federation.py
102111 run : python patch_for_debugging.py
103112
113+ - name : Create audience fix patch script
114+ run : |
115+ cat > patch_for_audience_fix.py << 'EOF'
116+ #!/usr/bin/env python3
117+
118+ def patch_code():
119+ with open('src/databricks/sql/auth/token_federation.py', 'r') as f:
120+ content = f.read()
121+
122+ # Fix audience handling
123+ modified = content.replace(
124+ 'def _exchange_token(self, token, force_refresh=False):',
125+ '''def _exchange_token(self, token, force_refresh=False):
126+ # Additional handling for different audience formats
127+ import jwt
128+ try :
129+ # Try both standard and alternative audience formats
130+ audience_tried = False
131+
132+ def try_with_audience(token, audience) :
133+ nonlocal audience_tried
134+ if audience_tried :
135+ return None
136+
137+ audience_tried = True
138+ decoded = jwt.decode(token, options={"verify_signature" : False})
139+ aud = decoded.get("aud")
140+
141+ # Check if aud is a list and convert to string if needed
142+ if isinstance(aud, list) and len(aud) > 0 :
143+ aud = aud[0]
144+
145+ # Print audience for debugging
146+ print(f"Original token audience : {aud}")
147+
148+ if aud != audience :
149+ print(f"WARNING : Token audience '{aud}' doesn't match expected audience '{audience}'")
150+ # We won't modify the token as that would invalidate the signature
151+
152+ return None
153+
154+ # We're just collecting debugging info, not modifying the token
155+ try_with_audience(token, "https://github.com/databricks")
156+
157+ except Exception as e :
158+ print(f"Audience debug error : {str(e)}")
159+ ' ''
160+ )
161+
162+ with open(' src/databricks/sql/auth/token_federation.py', 'w') as f:
163+ f.write(modified)
164+
165+ if __name__ == "__main__" :
166+ patch_code()
167+ EOF
168+
169+ chmod +x patch_for_audience_fix.py
170+
171+ - name : Apply audience fix patches
172+ run : python patch_for_audience_fix.py
173+
104174 - name : Get GitHub OIDC token
105175 id : get-id-token
106176 uses : actions/github-script@v7
@@ -110,6 +180,106 @@ jobs:
110180 core.setSecret(token)
111181 core.setOutput('token', token)
112182
183+ - name : Decode and display OIDC token claims
184+ env :
185+ OIDC_TOKEN : ${{ steps.get-id-token.outputs.token }}
186+ run : |
187+ echo "Decoding GitHub OIDC token claims..."
188+ python -c '
189+ import sys, base64, json
190+
191+ token = """$OIDC_TOKEN"""
192+
193+ # Parse the token
194+ try:
195+ header, payload, signature = token.split(".")
196+
197+ # Add padding if needed
198+ payload_padding = payload + "=" * (-len(payload) % 4)
199+
200+ # Decode the payload
201+ decoded_payload = base64.b64decode(payload_padding).decode("utf-8")
202+ claims = json.loads(decoded_payload)
203+
204+ # Print important claims
205+ print("\n=== GITHUB OIDC TOKEN CLAIMS ===")
206+ print(f"Issuer (iss): {claims.get(\"iss\")}")
207+ print(f"Subject (sub): {claims.get(\"sub\")}")
208+ print(f"Audience (aud): {claims.get(\"aud\")}")
209+ print(f"Repository: {claims.get(\"repository\")}")
210+ print(f"Repository owner: {claims.get(\"repository_owner\")}")
211+ print(f"Event name: {claims.get(\"event_name\")}")
212+ print(f"Ref: {claims.get(\"ref\")}")
213+ print(f"Workflow ref: {claims.get(\"workflow_ref\")}")
214+ print("\n=== FULL CLAIMS ===")
215+ print(json.dumps(claims, indent=2))
216+ print("===========================\n")
217+ except Exception as e:
218+ print(f"Failed to decode token: {str(e)}")
219+ '
220+
221+ - name : Debug token exchange with curl
222+ env :
223+ DATABRICKS_HOST : ${{ github.event_name == 'workflow_dispatch' && inputs.databricks_host || secrets.DATABRICKS_HOST_FOR_TF }}
224+ IDENTITY_FEDERATION_CLIENT_ID : ${{ github.event_name == 'workflow_dispatch' && inputs.identity_federation_client_id || secrets.IDENTITY_FEDERATION_CLIENT_ID_FOR_TF }}
225+ OIDC_TOKEN : ${{ steps.get-id-token.outputs.token }}
226+ run : |
227+ echo "Attempting direct token exchange with curl..."
228+ echo "Host: $DATABRICKS_HOST"
229+ echo "Client ID: $IDENTITY_FEDERATION_CLIENT_ID"
230+
231+ # Debug token claims before making the request
232+ echo "Token claims:"
233+ python3 -c "
234+ import base64, json, sys
235+ token = \"$OIDC_TOKEN\"
236+ parts = token.split('.')
237+ if len(parts) >= 2:
238+ padding = '=' * (4 - len(parts[1]) % 4)
239+ decoded_bytes = base64.b64decode(parts[1] + padding)
240+ decoded_str = decoded_bytes.decode('utf-8')
241+ claims = json.loads(decoded_str)
242+ print(f\"Issuer: {claims.get('iss', 'unknown')}\")
243+ print(f\"Subject: {claims.get('sub', 'unknown')}\")
244+ print(f\"Audience: {claims.get('aud', 'unknown')}\")
245+ else:
246+ print('Invalid token format')
247+ "
248+
249+ # Create a properly URL-encoded request
250+ echo "Creating token exchange request..."
251+ curl_data=$(cat <<EOF
252+ client_id=$IDENTITY_FEDERATION_CLIENT_ID&\
253+ subject_token=$OIDC_TOKEN&\
254+ subject_token_type=urn:ietf:params:oauth:token-type:jwt&\
255+ grant_type=urn:ietf:params:oauth:grant-type:token-exchange&\
256+ scope=sql
257+ EOF
258+ )
259+
260+ # Print request details (except the token)
261+ echo "Request URL : https://$DATABRICKS_HOST/oidc/v1/token"
262+ echo "Request data : $(echo "$curl_data" | sed 's/subject_token=.*&/subject_token=REDACTED&/')"
263+
264+ # Make the request with detailed info
265+ echo "Sending request..."
266+ response=$(curl -v -s -X POST "https://$DATABRICKS_HOST/oidc/v1/token" \
267+ --data-raw "$curl_data" \
268+ -H "Content-Type : application/x-www-form-urlencoded" \
269+ -H "Accept : application/json" \
270+ 2>&1)
271+
272+ # Extract and display results
273+ echo "Response:"
274+ echo "$response"
275+
276+ # Extract HTTP status if possible
277+ status_code=$(echo "$response" | grep -o "< HTTP/[0-9.]* [0-9]*" | grep -o "[0-9]*$" || echo "unknown")
278+ echo "HTTP Status Code : $status_code"
279+
280+ # Don't fail the workflow if curl fails
281+ exit 0
282+
113283 - name : Create test script
114284 run : |
115285 cat > test_github_token_federation.py << 'EOF'
@@ -127,7 +297,9 @@ jobs:
127297 import sys
128298 import json
129299 import base64
300+ import requests
130301 from databricks import sql
302+ import time
131303
132304 def decode_jwt(token):
133305 """Decode and return the claims from a JWT token."""
@@ -137,6 +309,7 @@ jobs:
137309 raise ValueError("Invalid JWT format")
138310
139311 payload = parts[1]
312+ # Add padding if needed
140313 padding = '=' * (4 - len(payload) % 4)
141314 payload += padding
142315
@@ -146,6 +319,55 @@ jobs:
146319 print(f"Failed to decode token: {str(e)}")
147320 return None
148321
322+ def test_direct_token_exchange(host, token, client_id, audience=None):
323+ """Directly test token exchange with the Databricks API."""
324+ try:
325+ url = f"https://{host}/oidc/v1/token"
326+ data = {
327+ "client_id": client_id,
328+ "subject_token": token,
329+ "subject_token_type": "urn:ietf:params:oauth:token-type:jwt",
330+ "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange",
331+ "scope": "sql",
332+ "return_original_token_if_authenticated": "true"
333+ }
334+
335+ headers = {
336+ "Content-Type": "application/x-www-form-urlencoded",
337+ "Accept": "application/json"
338+ }
339+
340+ print(f"Testing direct token exchange with {url}")
341+ print(f"Request parameters: {data}")
342+
343+ # Add debugging info
344+ claims = decode_jwt(token)
345+ if claims:
346+ print(f"Token issuer: {claims.get('iss', 'unknown')}")
347+ print(f"Token subject: {claims.get('sub', 'unknown')}")
348+ print(f"Token audience: {claims.get('aud', 'unknown')}")
349+
350+ # If audience was specified in policy but doesn't match token
351+ if audience and audience != claims.get('aud'):
352+ print(f"WARNING: Expected audience '{audience}' doesn't match token audience '{claims.get('aud')}'")
353+
354+ response = requests.post(url, data=data, headers=headers)
355+
356+ print(f"Status code: {response.status_code}")
357+ print(f"Response headers: {dict(response.headers)}")
358+ print(f"Response content: {response.text}")
359+
360+ if response.status_code == 200:
361+ try:
362+ return json.loads(response.text).get("access_token")
363+ except json.JSONDecodeError:
364+ print("Failed to parse response JSON")
365+ return None
366+ return None
367+ except Exception as e:
368+ print(f"Direct token exchange failed: {str(e)}")
369+ return None
370+
149371 def main():
150372 # Get GitHub OIDC token
151373 github_token = os.environ.get("OIDC_TOKEN")
@@ -164,20 +386,63 @@ jobs:
164386
165387 claims = decode_jwt(github_token)
166388 if claims:
389+ print("\n=== GitHub OIDC Token Claims ===")
167390 print(f"Token issuer: {claims.get('iss', 'unknown')}")
168391 print(f"Token subject: {claims.get('sub', 'unknown')}")
169392 print(f"Token audience: {claims.get('aud', 'unknown')}")
393+ print(f"Token expiration: {claims.get('exp', 'unknown')}")
394+ print(f"Repository: {claims.get('repository', 'unknown')}")
395+ print(f"Workflow ref: {claims.get('workflow_ref', 'unknown')}")
396+ print(f"Event name: {claims.get('event_name', 'unknown')}")
397+ print("===============================\n")
398+
399+ # Try token exchange with several possible audience values
400+ audience_values = [
401+ "https://github.com/databricks", # Standard audience for GitHub tokens
402+ "https://github.com", # Alternative audience
403+ None # No audience
404+ ]
405+
406+ # Direct token exchange test
407+ access_token = None
408+ for audience in audience_values:
409+ print(f"\n=== Testing Direct Token Exchange (audience={audience}) ===")
410+ result = test_direct_token_exchange(host, github_token, identity_federation_client_id, audience)
411+ if result:
412+ print("Direct token exchange successful!")
413+ access_token = result
414+ token_claims = decode_jwt(result)
415+ if token_claims:
416+ print(f"Databricks token subject: {token_claims.get('sub', 'unknown')}")
417+ break
418+ print(f"Token exchange failed with audience={audience}")
419+ # Add a small delay between attempts
420+ time.sleep(1)
421+
422+ if not access_token:
423+ print("All token exchange attempts failed")
424+ print("=====================================\n")
425+ else:
426+ print("=====================================\n")
170427
171428 try:
172429 # Connect to Databricks using token federation
430+ print(f"\n=== Testing Connection via Connector ===")
173431 print(f"Connecting to Databricks at {host}{http_path}")
174- with sql.connect(
175- server_hostname=host,
176- http_path=http_path,
177- access_token=github_token,
178- auth_type="token-federation",
179- identity_federation_client_id=identity_federation_client_id
180- ) as connection:
432+ print(f"Using client ID: {identity_federation_client_id}")
433+
434+ connection_params = {
435+ "server_hostname": host,
436+ "http_path": http_path,
437+ "access_token": github_token,
438+ "auth_type": "token-federation",
439+ "identity_federation_client_id": identity_federation_client_id,
440+ }
441+
442+ print("Connection parameters:")
443+ print(json.dumps({k: v if k != 'access_token' else '***' for k, v in connection_params.items()}, indent=2))
444+
445+ with sql.connect(**connection_params) as connection:
181446 print("Connection established successfully")
182447
183448 # Execute a simple query
@@ -195,6 +460,7 @@ jobs:
195460 return True
196461 except Exception as e:
197462 print(f"Error connecting to Databricks: {str(e)}")
463+ print("===================================\n")
198464 sys.exit(1)
199465
200466 if __name__ == "__main__":
@@ -206,7 +472,7 @@ jobs:
206472 env :
207473 DATABRICKS_HOST_FOR_TF : ${{ github.event_name == 'workflow_dispatch' && inputs.databricks_host || secrets.DATABRICKS_HOST_FOR_TF }}
208474 DATABRICKS_HTTP_PATH_FOR_TF : ${{ github.event_name == 'workflow_dispatch' && inputs.databricks_http_path || secrets.DATABRICKS_HTTP_PATH_FOR_TF }}
209- IDENTITY_FEDERATION_CLIENT_ID : ${{ github.event_name == 'workflow_dispatch' && inputs.identity_federation_client_id || secrets.IDENTITY_FEDERATION_CLIENT_ID }}
475+ IDENTITY_FEDERATION_CLIENT_ID_FOR_TF : ${{ github.event_name == 'workflow_dispatch' && inputs.identity_federation_client_id || secrets.IDENTITY_FEDERATION_CLIENT_ID_FOR_TF }}
210476 OIDC_TOKEN : ${{ steps.get-id-token.outputs.token }}
211477 run : |
212478 python test_github_token_federation.py
0 commit comments