3131 Dict ,
3232 Generator ,
3333 Iterable ,
34+ Sequence ,
3435 Union ,
3536 Optional ,
3637 List ,
@@ -3010,6 +3011,7 @@ def build_insert_queries_and_params(
30103011 num_records_processed ,
30113012 replace ,
30123013 ignore ,
3014+ list_mode = False ,
30133015 ):
30143016 """
30153017 Given a list ``chunk`` of records that should be written to *this* table,
@@ -3024,24 +3026,47 @@ def build_insert_queries_and_params(
30243026 # Build a row-list ready for executemany-style flattening
30253027 values = []
30263028
3027- for record in chunk :
3028- record_values = []
3029- for key in all_columns :
3030- value = jsonify_if_needed (
3031- record .get (
3032- key ,
3033- (
3034- None
3035- if key != hash_id
3036- else hash_record (record , hash_id_columns )
3037- ),
3029+ if list_mode :
3030+ # In list mode, records are already lists of values
3031+ num_columns = len (all_columns )
3032+ has_extracts = bool (extracts )
3033+ for record in chunk :
3034+ # Pad short records with None, truncate long ones
3035+ record_len = len (record )
3036+ if record_len < num_columns :
3037+ record_values = [jsonify_if_needed (v ) for v in record ] + [None ] * (
3038+ num_columns - record_len
30383039 )
3039- )
3040- if key in extracts :
3041- extract_table = extracts [key ]
3042- value = self .db [extract_table ].lookup ({"value" : value })
3043- record_values .append (value )
3044- values .append (record_values )
3040+ else :
3041+ record_values = [jsonify_if_needed (v ) for v in record [:num_columns ]]
3042+ # Only process extracts if there are any
3043+ if has_extracts :
3044+ for i , key in enumerate (all_columns ):
3045+ if key in extracts :
3046+ record_values [i ] = self .db [extracts [key ]].lookup (
3047+ {"value" : record_values [i ]}
3048+ )
3049+ values .append (record_values )
3050+ else :
3051+ # Dict mode: original logic
3052+ for record in chunk :
3053+ record_values = []
3054+ for key in all_columns :
3055+ value = jsonify_if_needed (
3056+ record .get (
3057+ key ,
3058+ (
3059+ None
3060+ if key != hash_id
3061+ else hash_record (record , hash_id_columns )
3062+ ),
3063+ )
3064+ )
3065+ if key in extracts :
3066+ extract_table = extracts [key ]
3067+ value = self .db [extract_table ].lookup ({"value" : value })
3068+ record_values .append (value )
3069+ values .append (record_values )
30453070
30463071 columns_sql = ", " .join (f"[{ c } ]" for c in all_columns )
30473072 placeholder_expr = ", " .join (conversions .get (c , "?" ) for c in all_columns )
@@ -3157,6 +3182,7 @@ def insert_chunk(
31573182 num_records_processed ,
31583183 replace ,
31593184 ignore ,
3185+ list_mode = False ,
31603186 ) -> Optional [sqlite3 .Cursor ]:
31613187 queries_and_params = self .build_insert_queries_and_params (
31623188 extracts ,
@@ -3171,6 +3197,7 @@ def insert_chunk(
31713197 num_records_processed ,
31723198 replace ,
31733199 ignore ,
3200+ list_mode ,
31743201 )
31753202 result = None
31763203 with self .db .conn :
@@ -3200,6 +3227,7 @@ def insert_chunk(
32003227 num_records_processed ,
32013228 replace ,
32023229 ignore ,
3230+ list_mode ,
32033231 )
32043232
32053233 result = self .insert_chunk (
@@ -3216,6 +3244,7 @@ def insert_chunk(
32163244 num_records_processed ,
32173245 replace ,
32183246 ignore ,
3247+ list_mode ,
32193248 )
32203249
32213250 else :
@@ -3293,7 +3322,10 @@ def insert(
32933322
32943323 def insert_all (
32953324 self ,
3296- records ,
3325+ records : Union [
3326+ Iterable [Dict [str , Any ]],
3327+ Iterable [Sequence [Any ]],
3328+ ],
32973329 pk = DEFAULT ,
32983330 foreign_keys = DEFAULT ,
32993331 column_order = DEFAULT ,
@@ -3353,17 +3385,54 @@ def insert_all(
33533385 all_columns = []
33543386 first = True
33553387 num_records_processed = 0
3356- # Fix up any records with square braces in the column names
3357- records = fix_square_braces (records )
3358- # We can only handle a max of 999 variables in a SQL insert, so
3359- # we need to adjust the batch_size down if we have too many cols
3360- records = iter (records )
3361- # Peek at first record to count its columns:
3388+
3389+ # Detect if we're using list-based iteration or dict-based iteration
3390+ list_mode = False
3391+ column_names : List [str ] = []
3392+
3393+ # Fix up any records with square braces in the column names (only for dict mode)
3394+ # We'll handle this differently for list mode
3395+ records_iter = iter (records )
3396+
3397+ # Peek at first record to determine mode:
33623398 try :
3363- first_record = next (records )
3399+ first_record = next (records_iter )
33643400 except StopIteration :
33653401 return self # It was an empty list
3366- num_columns = len (first_record .keys ())
3402+
3403+ # Check if this is list mode or dict mode
3404+ if isinstance (first_record , (list , tuple )):
3405+ # List/tuple mode: first record should be column names
3406+ list_mode = True
3407+ if not all (isinstance (col , str ) for col in first_record ):
3408+ raise ValueError (
3409+ "When using list-based iteration, the first yielded value must be a list of column name strings"
3410+ )
3411+ column_names = list (first_record )
3412+ all_columns = column_names
3413+ num_columns = len (column_names )
3414+ # Get the actual first data record
3415+ try :
3416+ first_record = next (records_iter )
3417+ except StopIteration :
3418+ return self # Only headers, no data
3419+ if not isinstance (first_record , (list , tuple )):
3420+ raise ValueError (
3421+ "After column names list, all subsequent records must also be lists"
3422+ )
3423+ else :
3424+ # Dict mode: traditional behavior
3425+ records_iter = itertools .chain ([first_record ], records_iter )
3426+ records_iter = fix_square_braces (
3427+ cast (Iterable [Dict [str , Any ]], records_iter )
3428+ )
3429+ try :
3430+ first_record = next (records_iter )
3431+ except StopIteration :
3432+ return self
3433+ first_record = cast (Dict [str , Any ], first_record )
3434+ num_columns = len (first_record .keys ())
3435+
33673436 assert (
33683437 num_columns <= SQLITE_MAX_VARS
33693438 ), "Rows can have a maximum of {} columns" .format (SQLITE_MAX_VARS )
@@ -3373,13 +3442,18 @@ def insert_all(
33733442 if truncate and self .exists ():
33743443 self .db .execute ("DELETE FROM [{}];" .format (self .name ))
33753444 result = None
3376- for chunk in chunks (itertools .chain ([first_record ], records ), batch_size ):
3445+ for chunk in chunks (itertools .chain ([first_record ], records_iter ), batch_size ):
33773446 chunk = list (chunk )
33783447 num_records_processed += len (chunk )
33793448 if first :
33803449 if not self .exists ():
33813450 # Use the first batch to derive the table names
3382- column_types = suggest_column_types (chunk )
3451+ if list_mode :
3452+ # Convert list records to dicts for type detection
3453+ chunk_as_dicts = [dict (zip (column_names , row )) for row in chunk ]
3454+ column_types = suggest_column_types (chunk_as_dicts )
3455+ else :
3456+ column_types = suggest_column_types (chunk )
33833457 if extracts :
33843458 for col in extracts :
33853459 if col in column_types :
@@ -3399,17 +3473,24 @@ def insert_all(
33993473 extracts = extracts ,
34003474 strict = strict ,
34013475 )
3402- all_columns_set = set ()
3403- for record in chunk :
3404- all_columns_set .update (record .keys ())
3405- all_columns = list (sorted (all_columns_set ))
3406- if hash_id :
3407- all_columns .insert (0 , hash_id )
3476+ if list_mode :
3477+ # In list mode, columns are already known
3478+ all_columns = list (column_names )
3479+ if hash_id :
3480+ all_columns .insert (0 , hash_id )
3481+ else :
3482+ all_columns_set = set ()
3483+ for record in chunk :
3484+ all_columns_set .update (record .keys ())
3485+ all_columns = list (sorted (all_columns_set ))
3486+ if hash_id :
3487+ all_columns .insert (0 , hash_id )
34083488 else :
3409- for record in chunk :
3410- all_columns += [
3411- column for column in record if column not in all_columns
3412- ]
3489+ if not list_mode :
3490+ for record in chunk :
3491+ all_columns += [
3492+ column for column in record if column not in all_columns
3493+ ]
34133494
34143495 first = False
34153496
@@ -3427,6 +3508,7 @@ def insert_all(
34273508 num_records_processed ,
34283509 replace ,
34293510 ignore ,
3511+ list_mode ,
34303512 )
34313513
34323514 # If we only handled a single row populate self.last_pk
@@ -3447,14 +3529,29 @@ def insert_all(
34473529 self .last_pk = self .last_rowid
34483530 else :
34493531 # For an upsert use first_record from earlier
3450- if hash_id :
3451- self .last_pk = hash_record (first_record , hash_id_columns )
3532+ if list_mode :
3533+ # In list mode, look up pk value by column index
3534+ first_record_list = cast (Sequence [Any ], first_record )
3535+ if hash_id :
3536+ # hash_id not supported in list mode for last_pk
3537+ pass
3538+ elif isinstance (pk , str ):
3539+ pk_index = column_names .index (pk )
3540+ self .last_pk = first_record_list [pk_index ]
3541+ else :
3542+ self .last_pk = tuple (
3543+ first_record_list [column_names .index (p )] for p in pk
3544+ )
34523545 else :
3453- self .last_pk = (
3454- first_record [pk ]
3455- if isinstance (pk , str )
3456- else tuple (first_record [p ] for p in pk )
3457- )
3546+ first_record_dict = cast (Dict [str , Any ], first_record )
3547+ if hash_id :
3548+ self .last_pk = hash_record (first_record_dict , hash_id_columns )
3549+ else :
3550+ self .last_pk = (
3551+ first_record_dict [pk ]
3552+ if isinstance (pk , str )
3553+ else tuple (first_record_dict [p ] for p in pk )
3554+ )
34583555
34593556 if analyze :
34603557 self .analyze ()
@@ -3501,7 +3598,10 @@ def upsert(
35013598
35023599 def upsert_all (
35033600 self ,
3504- records ,
3601+ records : Union [
3602+ Iterable [Dict [str , Any ]],
3603+ Iterable [Sequence [Any ]],
3604+ ],
35053605 pk = DEFAULT ,
35063606 foreign_keys = DEFAULT ,
35073607 column_order = DEFAULT ,
0 commit comments