2121def test_sea_multi_chunk_with_cloud_fetch (requested_row_count = 5000 ):
2222 """
2323 Test executing a query that generates multiple chunks using cloud fetch.
24-
24+
2525 Args:
2626 requested_row_count: Number of rows to request in the query
27-
27+
2828 Returns:
2929 bool: True if the test passed, False otherwise
3030 """
3131 server_hostname = os .environ .get ("DATABRICKS_SERVER_HOSTNAME" )
3232 http_path = os .environ .get ("DATABRICKS_HTTP_PATH" )
3333 access_token = os .environ .get ("DATABRICKS_TOKEN" )
3434 catalog = os .environ .get ("DATABRICKS_CATALOG" )
35-
35+
3636 # Create output directory for test results
3737 output_dir = Path ("test_results" )
3838 output_dir .mkdir (exist_ok = True )
39-
39+
4040 # Files to store results
4141 rows_file = output_dir / "cloud_fetch_rows.csv"
4242 stats_file = output_dir / "cloud_fetch_stats.json"
@@ -50,9 +50,7 @@ def test_sea_multi_chunk_with_cloud_fetch(requested_row_count=5000):
5050
5151 try :
5252 # Create connection with cloud fetch enabled
53- logger .info (
54- "Creating connection for query execution with cloud fetch enabled"
55- )
53+ logger .info ("Creating connection for query execution with cloud fetch enabled" )
5654 connection = Connection (
5755 server_hostname = server_hostname ,
5856 http_path = http_path ,
@@ -76,46 +74,50 @@ def test_sea_multi_chunk_with_cloud_fetch(requested_row_count=5000):
7674 concat('value_', repeat('a', 10000)) as test_value
7775 FROM range(1, { requested_row_count } + 1) AS t(id)
7876 """
79-
80- logger .info (f"Executing query with cloud fetch to generate { requested_row_count } rows" )
77+
78+ logger .info (
79+ f"Executing query with cloud fetch to generate { requested_row_count } rows"
80+ )
8181 start_time = time .time ()
8282 cursor .execute (query )
83-
83+
8484 # Fetch all rows
8585 rows = cursor .fetchall ()
8686 actual_row_count = len (rows )
8787 end_time = time .time ()
8888 execution_time = end_time - start_time
89-
89+
9090 logger .info (f"Query executed in { execution_time :.2f} seconds" )
91- logger .info (f"Requested { requested_row_count } rows, received { actual_row_count } rows" )
92-
91+ logger .info (
92+ f"Requested { requested_row_count } rows, received { actual_row_count } rows"
93+ )
94+
9395 # Write rows to CSV file for inspection
9496 logger .info (f"Writing rows to { rows_file } " )
95- with open (rows_file , 'w' , newline = '' ) as f :
97+ with open (rows_file , "w" , newline = "" ) as f :
9698 writer = csv .writer (f )
97- writer .writerow (['id' , ' value_length' ]) # Header
98-
99+ writer .writerow (["id" , " value_length" ]) # Header
100+
99101 # Extract IDs to check for duplicates and missing values
100102 row_ids = []
101103 for row in rows :
102104 row_id = row [0 ]
103105 value_length = len (row [1 ])
104106 writer .writerow ([row_id , value_length ])
105107 row_ids .append (row_id )
106-
108+
107109 # Verify row count
108110 success = actual_row_count == requested_row_count
109-
111+
110112 # Check for duplicate IDs
111113 unique_ids = set (row_ids )
112114 duplicate_count = len (row_ids ) - len (unique_ids )
113-
115+
114116 # Check for missing IDs
115117 expected_ids = set (range (1 , requested_row_count + 1 ))
116118 missing_ids = expected_ids - unique_ids
117119 extra_ids = unique_ids - expected_ids
118-
120+
119121 # Write statistics to JSON file
120122 stats = {
121123 "requested_row_count" : requested_row_count ,
@@ -124,56 +126,64 @@ def test_sea_multi_chunk_with_cloud_fetch(requested_row_count=5000):
124126 "duplicate_count" : duplicate_count ,
125127 "missing_ids_count" : len (missing_ids ),
126128 "extra_ids_count" : len (extra_ids ),
127- "missing_ids" : list (missing_ids )[:100 ] if missing_ids else [], # Limit to first 100 for readability
128- "extra_ids" : list (extra_ids )[:100 ] if extra_ids else [], # Limit to first 100 for readability
129- "success" : success and duplicate_count == 0 and len (missing_ids ) == 0 and len (extra_ids ) == 0
129+ "missing_ids" : list (missing_ids )[:100 ]
130+ if missing_ids
131+ else [], # Limit to first 100 for readability
132+ "extra_ids" : list (extra_ids )[:100 ]
133+ if extra_ids
134+ else [], # Limit to first 100 for readability
135+ "success" : success
136+ and duplicate_count == 0
137+ and len (missing_ids ) == 0
138+ and len (extra_ids ) == 0 ,
130139 }
131-
132- with open (stats_file , 'w' ) as f :
140+
141+ with open (stats_file , "w" ) as f :
133142 json .dump (stats , f , indent = 2 )
134-
143+
135144 # Log detailed results
136145 if duplicate_count > 0 :
137146 logger .error (f"❌ FAILED: Found { duplicate_count } duplicate row IDs" )
138147 success = False
139148 else :
140149 logger .info ("✅ PASSED: No duplicate row IDs found" )
141-
150+
142151 if missing_ids :
143152 logger .error (f"❌ FAILED: Missing { len (missing_ids )} expected row IDs" )
144153 if len (missing_ids ) <= 10 :
145154 logger .error (f"Missing IDs: { sorted (list (missing_ids ))} " )
146155 success = False
147156 else :
148157 logger .info ("✅ PASSED: All expected row IDs present" )
149-
158+
150159 if extra_ids :
151160 logger .error (f"❌ FAILED: Found { len (extra_ids )} unexpected row IDs" )
152161 if len (extra_ids ) <= 10 :
153162 logger .error (f"Extra IDs: { sorted (list (extra_ids ))} " )
154163 success = False
155164 else :
156165 logger .info ("✅ PASSED: No unexpected row IDs found" )
157-
166+
158167 if actual_row_count == requested_row_count :
159168 logger .info ("✅ PASSED: Row count matches requested count" )
160169 else :
161- logger .error (f"❌ FAILED: Row count mismatch. Expected { requested_row_count } , got { actual_row_count } " )
170+ logger .error (
171+ f"❌ FAILED: Row count mismatch. Expected { requested_row_count } , got { actual_row_count } "
172+ )
162173 success = False
163-
174+
164175 # Close resources
165176 cursor .close ()
166177 connection .close ()
167178 logger .info ("Successfully closed SEA session" )
168-
179+
169180 logger .info (f"Test results written to { rows_file } and { stats_file } " )
170181 return success
171182
172183 except Exception as e :
173- logger .error (
174- f"Error during SEA multi-chunk test with cloud fetch: { str (e )} "
175- )
184+ logger .error (f"Error during SEA multi-chunk test with cloud fetch: { str (e )} " )
176185 import traceback
186+
177187 logger .error (traceback .format_exc ())
178188 return False
179189
@@ -193,31 +203,33 @@ def main():
193203 )
194204 logger .error ("Please set these variables before running the tests." )
195205 sys .exit (1 )
196-
206+
197207 # Get row count from command line or use default
198208 requested_row_count = 10000
199-
209+
200210 if len (sys .argv ) > 1 :
201211 try :
202212 requested_row_count = int (sys .argv [1 ])
203213 except ValueError :
204214 logger .error (f"Invalid row count: { sys .argv [1 ]} " )
205215 logger .error ("Please provide a valid integer for row count." )
206216 sys .exit (1 )
207-
217+
208218 logger .info (f"Testing with { requested_row_count } rows" )
209-
219+
210220 # Run the multi-chunk test with cloud fetch
211221 success = test_sea_multi_chunk_with_cloud_fetch (requested_row_count )
212-
222+
213223 # Report results
214224 if success :
215- logger .info ("✅ TEST PASSED: Multi-chunk cloud fetch test completed successfully" )
225+ logger .info (
226+ "✅ TEST PASSED: Multi-chunk cloud fetch test completed successfully"
227+ )
216228 sys .exit (0 )
217229 else :
218230 logger .error ("❌ TEST FAILED: Multi-chunk cloud fetch test encountered errors" )
219231 sys .exit (1 )
220232
221233
222234if __name__ == "__main__" :
223- main ()
235+ main ()
0 commit comments