Skip to content

Commit 7ffd505

Browse files
committed
table.insert_all() and table.upsert_all() now take generators of lists/tuples
Closes #672, PR #673
1 parent dfb2dbe commit 7ffd505

File tree

5 files changed

+494
-60
lines changed

5 files changed

+494
-60
lines changed

docs/changelog.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@
44
Changelog
55
===========
66

7+
.. _unreleased:
8+
9+
Unreleased
10+
----------
11+
12+
- The ``table.insert_all()`` and ``table.upsert_all()`` methods can now accept an iterator of lists or tuples as an alternative to dictionaries. The first item should be a list/tuple of column names. See :ref:`python_api_insert_lists` for details. (:issue:`672`)
13+
714
.. _v4_0a0:
815

916
4.0a0 (2025-05-08)

docs/python-api.rst

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,35 @@ You can delete all the existing rows in the table before inserting the new recor
810810

811811
Pass ``analyze=True`` to run ``ANALYZE`` against the table after inserting the new records.
812812

813+
.. _python_api_insert_lists:
814+
815+
Inserting data from a list or tuple iterator
816+
--------------------------------------------
817+
818+
As an alternative to passing an iterator of dictionaries, you can pass an iterator of lists or tuples. The first item yielded by the iterator must be a list or tuple of string column names, and subsequent items should be lists or tuples of values:
819+
820+
.. code-block:: python
821+
822+
db["creatures"].insert_all([
823+
["name", "species"],
824+
["Cleo", "dog"],
825+
["Lila", "chicken"],
826+
["Bants", "chicken"],
827+
])
828+
829+
This also works with generators:
830+
831+
.. code-block:: python
832+
833+
def creatures():
834+
yield "id", "name", "city"
835+
yield 1, "Cleo", "San Francisco"
836+
yield 2, "Lila", "Los Angeles"
837+
838+
db["creatures"].insert_all(creatures())
839+
840+
Tuples and lists are both supported.
841+
813842
.. _python_api_insert_replace:
814843

815844
Insert-replacing data

sqlite_utils/db.py

Lines changed: 146 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
Dict,
3232
Generator,
3333
Iterable,
34+
Sequence,
3435
Union,
3536
Optional,
3637
List,
@@ -3010,6 +3011,7 @@ def build_insert_queries_and_params(
30103011
num_records_processed,
30113012
replace,
30123013
ignore,
3014+
list_mode=False,
30133015
):
30143016
"""
30153017
Given a list ``chunk`` of records that should be written to *this* table,
@@ -3024,24 +3026,47 @@ def build_insert_queries_and_params(
30243026
# Build a row-list ready for executemany-style flattening
30253027
values = []
30263028

3027-
for record in chunk:
3028-
record_values = []
3029-
for key in all_columns:
3030-
value = jsonify_if_needed(
3031-
record.get(
3032-
key,
3033-
(
3034-
None
3035-
if key != hash_id
3036-
else hash_record(record, hash_id_columns)
3037-
),
3029+
if list_mode:
3030+
# In list mode, records are already lists of values
3031+
num_columns = len(all_columns)
3032+
has_extracts = bool(extracts)
3033+
for record in chunk:
3034+
# Pad short records with None, truncate long ones
3035+
record_len = len(record)
3036+
if record_len < num_columns:
3037+
record_values = [jsonify_if_needed(v) for v in record] + [None] * (
3038+
num_columns - record_len
30383039
)
3039-
)
3040-
if key in extracts:
3041-
extract_table = extracts[key]
3042-
value = self.db[extract_table].lookup({"value": value})
3043-
record_values.append(value)
3044-
values.append(record_values)
3040+
else:
3041+
record_values = [jsonify_if_needed(v) for v in record[:num_columns]]
3042+
# Only process extracts if there are any
3043+
if has_extracts:
3044+
for i, key in enumerate(all_columns):
3045+
if key in extracts:
3046+
record_values[i] = self.db[extracts[key]].lookup(
3047+
{"value": record_values[i]}
3048+
)
3049+
values.append(record_values)
3050+
else:
3051+
# Dict mode: original logic
3052+
for record in chunk:
3053+
record_values = []
3054+
for key in all_columns:
3055+
value = jsonify_if_needed(
3056+
record.get(
3057+
key,
3058+
(
3059+
None
3060+
if key != hash_id
3061+
else hash_record(record, hash_id_columns)
3062+
),
3063+
)
3064+
)
3065+
if key in extracts:
3066+
extract_table = extracts[key]
3067+
value = self.db[extract_table].lookup({"value": value})
3068+
record_values.append(value)
3069+
values.append(record_values)
30453070

30463071
columns_sql = ", ".join(f"[{c}]" for c in all_columns)
30473072
placeholder_expr = ", ".join(conversions.get(c, "?") for c in all_columns)
@@ -3157,6 +3182,7 @@ def insert_chunk(
31573182
num_records_processed,
31583183
replace,
31593184
ignore,
3185+
list_mode=False,
31603186
) -> Optional[sqlite3.Cursor]:
31613187
queries_and_params = self.build_insert_queries_and_params(
31623188
extracts,
@@ -3171,6 +3197,7 @@ def insert_chunk(
31713197
num_records_processed,
31723198
replace,
31733199
ignore,
3200+
list_mode,
31743201
)
31753202
result = None
31763203
with self.db.conn:
@@ -3200,6 +3227,7 @@ def insert_chunk(
32003227
num_records_processed,
32013228
replace,
32023229
ignore,
3230+
list_mode,
32033231
)
32043232

32053233
result = self.insert_chunk(
@@ -3216,6 +3244,7 @@ def insert_chunk(
32163244
num_records_processed,
32173245
replace,
32183246
ignore,
3247+
list_mode,
32193248
)
32203249

32213250
else:
@@ -3293,7 +3322,10 @@ def insert(
32933322

32943323
def insert_all(
32953324
self,
3296-
records,
3325+
records: Union[
3326+
Iterable[Dict[str, Any]],
3327+
Iterable[Sequence[Any]],
3328+
],
32973329
pk=DEFAULT,
32983330
foreign_keys=DEFAULT,
32993331
column_order=DEFAULT,
@@ -3353,17 +3385,54 @@ def insert_all(
33533385
all_columns = []
33543386
first = True
33553387
num_records_processed = 0
3356-
# Fix up any records with square braces in the column names
3357-
records = fix_square_braces(records)
3358-
# We can only handle a max of 999 variables in a SQL insert, so
3359-
# we need to adjust the batch_size down if we have too many cols
3360-
records = iter(records)
3361-
# Peek at first record to count its columns:
3388+
3389+
# Detect if we're using list-based iteration or dict-based iteration
3390+
list_mode = False
3391+
column_names: List[str] = []
3392+
3393+
# Fix up any records with square braces in the column names (only for dict mode)
3394+
# We'll handle this differently for list mode
3395+
records_iter = iter(records)
3396+
3397+
# Peek at first record to determine mode:
33623398
try:
3363-
first_record = next(records)
3399+
first_record = next(records_iter)
33643400
except StopIteration:
33653401
return self # It was an empty list
3366-
num_columns = len(first_record.keys())
3402+
3403+
# Check if this is list mode or dict mode
3404+
if isinstance(first_record, (list, tuple)):
3405+
# List/tuple mode: first record should be column names
3406+
list_mode = True
3407+
if not all(isinstance(col, str) for col in first_record):
3408+
raise ValueError(
3409+
"When using list-based iteration, the first yielded value must be a list of column name strings"
3410+
)
3411+
column_names = list(first_record)
3412+
all_columns = column_names
3413+
num_columns = len(column_names)
3414+
# Get the actual first data record
3415+
try:
3416+
first_record = next(records_iter)
3417+
except StopIteration:
3418+
return self # Only headers, no data
3419+
if not isinstance(first_record, (list, tuple)):
3420+
raise ValueError(
3421+
"After column names list, all subsequent records must also be lists"
3422+
)
3423+
else:
3424+
# Dict mode: traditional behavior
3425+
records_iter = itertools.chain([first_record], records_iter)
3426+
records_iter = fix_square_braces(
3427+
cast(Iterable[Dict[str, Any]], records_iter)
3428+
)
3429+
try:
3430+
first_record = next(records_iter)
3431+
except StopIteration:
3432+
return self
3433+
first_record = cast(Dict[str, Any], first_record)
3434+
num_columns = len(first_record.keys())
3435+
33673436
assert (
33683437
num_columns <= SQLITE_MAX_VARS
33693438
), "Rows can have a maximum of {} columns".format(SQLITE_MAX_VARS)
@@ -3373,13 +3442,18 @@ def insert_all(
33733442
if truncate and self.exists():
33743443
self.db.execute("DELETE FROM [{}];".format(self.name))
33753444
result = None
3376-
for chunk in chunks(itertools.chain([first_record], records), batch_size):
3445+
for chunk in chunks(itertools.chain([first_record], records_iter), batch_size):
33773446
chunk = list(chunk)
33783447
num_records_processed += len(chunk)
33793448
if first:
33803449
if not self.exists():
33813450
# Use the first batch to derive the table names
3382-
column_types = suggest_column_types(chunk)
3451+
if list_mode:
3452+
# Convert list records to dicts for type detection
3453+
chunk_as_dicts = [dict(zip(column_names, row)) for row in chunk]
3454+
column_types = suggest_column_types(chunk_as_dicts)
3455+
else:
3456+
column_types = suggest_column_types(chunk)
33833457
if extracts:
33843458
for col in extracts:
33853459
if col in column_types:
@@ -3399,17 +3473,24 @@ def insert_all(
33993473
extracts=extracts,
34003474
strict=strict,
34013475
)
3402-
all_columns_set = set()
3403-
for record in chunk:
3404-
all_columns_set.update(record.keys())
3405-
all_columns = list(sorted(all_columns_set))
3406-
if hash_id:
3407-
all_columns.insert(0, hash_id)
3476+
if list_mode:
3477+
# In list mode, columns are already known
3478+
all_columns = list(column_names)
3479+
if hash_id:
3480+
all_columns.insert(0, hash_id)
3481+
else:
3482+
all_columns_set = set()
3483+
for record in chunk:
3484+
all_columns_set.update(record.keys())
3485+
all_columns = list(sorted(all_columns_set))
3486+
if hash_id:
3487+
all_columns.insert(0, hash_id)
34083488
else:
3409-
for record in chunk:
3410-
all_columns += [
3411-
column for column in record if column not in all_columns
3412-
]
3489+
if not list_mode:
3490+
for record in chunk:
3491+
all_columns += [
3492+
column for column in record if column not in all_columns
3493+
]
34133494

34143495
first = False
34153496

@@ -3427,6 +3508,7 @@ def insert_all(
34273508
num_records_processed,
34283509
replace,
34293510
ignore,
3511+
list_mode,
34303512
)
34313513

34323514
# If we only handled a single row populate self.last_pk
@@ -3447,14 +3529,29 @@ def insert_all(
34473529
self.last_pk = self.last_rowid
34483530
else:
34493531
# For an upsert use first_record from earlier
3450-
if hash_id:
3451-
self.last_pk = hash_record(first_record, hash_id_columns)
3532+
if list_mode:
3533+
# In list mode, look up pk value by column index
3534+
first_record_list = cast(Sequence[Any], first_record)
3535+
if hash_id:
3536+
# hash_id not supported in list mode for last_pk
3537+
pass
3538+
elif isinstance(pk, str):
3539+
pk_index = column_names.index(pk)
3540+
self.last_pk = first_record_list[pk_index]
3541+
else:
3542+
self.last_pk = tuple(
3543+
first_record_list[column_names.index(p)] for p in pk
3544+
)
34523545
else:
3453-
self.last_pk = (
3454-
first_record[pk]
3455-
if isinstance(pk, str)
3456-
else tuple(first_record[p] for p in pk)
3457-
)
3546+
first_record_dict = cast(Dict[str, Any], first_record)
3547+
if hash_id:
3548+
self.last_pk = hash_record(first_record_dict, hash_id_columns)
3549+
else:
3550+
self.last_pk = (
3551+
first_record_dict[pk]
3552+
if isinstance(pk, str)
3553+
else tuple(first_record_dict[p] for p in pk)
3554+
)
34583555

34593556
if analyze:
34603557
self.analyze()
@@ -3501,7 +3598,10 @@ def upsert(
35013598

35023599
def upsert_all(
35033600
self,
3504-
records,
3601+
records: Union[
3602+
Iterable[Dict[str, Any]],
3603+
Iterable[Sequence[Any]],
3604+
],
35053605
pk=DEFAULT,
35063606
foreign_keys=DEFAULT,
35073607
column_order=DEFAULT,

0 commit comments

Comments
 (0)