Skip to content

Commit 31ecb56

Browse files
committed
[ADD] util/update_table_from_dict
A recurrent challenge in writing upgrade scripts is that of updating values in a table based on some form of already available mapping from the id (or another identifier) to the new value, this is often addressed with an iterative solution in the form: ```python for key, value in mapping.items(): cr.execute( """ UPDATE table SET col = %s WHERE key_col = %s """, [value, key], ) ``` or in a more efficient (only issuing a single query) but hacky way: ```python cr.execute( """ UPDATE table SET col = (%s::jsonb)->>(key_col::text) WHERE key_col = ANY(%s) """, [json.dumps(mapping), list(mapping)], ) ``` With the former being ineffective for big mappings and the latter often requiring some comments at review time to get it right. This commit introduces a util meant to make it easier to efficiently perform such updates.
1 parent f1dd8f8 commit 31ecb56

File tree

2 files changed

+145
-1
lines changed

2 files changed

+145
-1
lines changed

src/base/tests/test_util.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -881,6 +881,63 @@ def test_parallel_execute_retry_on_serialization_failure(self):
881881
cr.execute(util.format_query(cr, "SELECT 1 FROM {}", TEST_TABLE_NAME))
882882
self.assertFalse(cr.rowcount)
883883

884+
def test_update_table_from_dict(self):
885+
TEST_TABLE_NAME = "_upgrade_update_table_from_dict_test_table"
886+
N_ROWS = 10
887+
888+
cr = self._get_cr()
889+
890+
cr.execute(
891+
util.format_query(
892+
cr,
893+
"""
894+
DROP TABLE IF EXISTS {table};
895+
896+
CREATE TABLE {table} (
897+
id SERIAL PRIMARY KEY,
898+
col1 INTEGER,
899+
col2 INTEGER
900+
);
901+
902+
INSERT INTO {table} (col1, col2) SELECT v, v FROM GENERATE_SERIES(1, %s) as v;
903+
"""
904+
% N_ROWS,
905+
table=TEST_TABLE_NAME,
906+
)
907+
)
908+
mapping = {id: {"col1": id * 2} for id in range(1, N_ROWS + 1, 2)}
909+
util.update_table_from_dict(cr, TEST_TABLE_NAME, mapping)
910+
911+
# no unintended column is affected
912+
cr.execute(
913+
util.format_query(
914+
cr,
915+
"SELECT COUNT(*) FROM {table} WHERE col2 != id",
916+
table=TEST_TABLE_NAME,
917+
)
918+
)
919+
self.assertEqual(cr.fetchone()[0], 0)
920+
921+
# no unintended row is affected
922+
cr.execute(
923+
util.format_query(
924+
cr,
925+
"SELECT COUNT(*) FROM {table} WHERE col1 != id AND MOD(id, 2) = 0",
926+
table=TEST_TABLE_NAME,
927+
)
928+
)
929+
self.assertEqual(cr.fetchone()[0], 0)
930+
931+
# all intended rows and columns are affected
932+
cr.execute(
933+
util.format_query(
934+
cr,
935+
"SELECT COUNT(*) FROM {table} WHERE col1 = id AND MOD(id, 2) = 1",
936+
table=TEST_TABLE_NAME,
937+
)
938+
)
939+
self.assertEqual(cr.fetchone()[0], 0)
940+
884941
def test_create_column_with_fk(self):
885942
cr = self.env.cr
886943
self.assertFalse(util.column_exists(cr, "res_partner", "_test_lang_id"))

src/util/pg.py

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
"""Utility functions for interacting with PostgreSQL."""
33

44
import collections
5+
import json
56
import logging
67
import os
78
import re
@@ -43,7 +44,7 @@
4344

4445
from .exceptions import MigrationError, SleepyDeveloperError
4546
from .helpers import _validate_table, model_of_table
46-
from .misc import Sentinel, log_progress, version_gte
47+
from .misc import Sentinel, chunks, log_progress, version_gte
4748

4849
_logger = logging.getLogger(__name__)
4950

@@ -1621,3 +1622,89 @@ def create_id_sequence(cr, table, set_as_default=True):
16211622
table=table_sql,
16221623
)
16231624
)
1625+
1626+
1627+
def update_table_from_dict(cr, table, mapping, key_col="id", bucket_size=DEFAULT_BUCKET_SIZE):
1628+
"""
1629+
Update table's rows based on mapping.
1630+
1631+
Efficiently updates rows in a table by mapping an identifier column (`key_col`) value to the new values for the provided set of columns.
1632+
1633+
.. example::
1634+
1635+
.. code-block:: python
1636+
1637+
util.update_table_from_dict(
1638+
cr,
1639+
"account_move",
1640+
{
1641+
1: {"closing_return_id": 2, "always_tax_eligible": True},
1642+
2: {"closing_return_id": 3, "always_tax_eligible": False},
1643+
},
1644+
)
1645+
1646+
:param str table: the table to update
1647+
:param dict[any, dict[str, any]] mapping: mapping of `key_col` identifiers to maps of column names to their new value
1648+
1649+
.. example::
1650+
1651+
.. code-block:: python
1652+
1653+
mapping = {
1654+
1: {"col1": 123, "col2": "foo"},
1655+
2: {"col1": 456, "col2": "bar"},
1656+
}
1657+
1658+
.. warning::
1659+
1660+
All maps should have the exact same set of keys (column names). The following
1661+
example would behave unpredictably:
1662+
1663+
.. code-block:: python
1664+
1665+
# WRONG
1666+
mapping = {
1667+
1: {"col1": 123, "col2": "foo"},
1668+
2: {"col1": 456},
1669+
}
1670+
1671+
Either resulting in `col2` updates being ignored or setting it to NULL for row 2.
1672+
1673+
:param str key_col: The column to match the key against (`id` by default)
1674+
:param int bucket_size: maximum number of rows to update per single query
1675+
"""
1676+
if not mapping:
1677+
return
1678+
1679+
_validate_table(table)
1680+
1681+
column_names = list(next(iter(mapping.values())).keys())
1682+
query = cr.mogrify(
1683+
format_query(
1684+
cr,
1685+
"""
1686+
UPDATE {table} t
1687+
SET ({columns_list}) = ROW({values_list})
1688+
FROM JSONB_EACH(%%s) m
1689+
WHERE t.{key_col}::varchar = m.key
1690+
""",
1691+
table=table,
1692+
columns_list=ColumnList.from_unquoted(cr, column_names),
1693+
values_list=sql.SQL(", ").join(
1694+
sql.SQL("(m.value->>%s)::{}").format(sql.SQL(column_type(cr, table, col))) for col in column_names
1695+
),
1696+
key_col=key_col,
1697+
),
1698+
column_names,
1699+
)
1700+
1701+
if len(mapping) <= 1.1 * bucket_size:
1702+
cr.execute(query, [json.dumps(mapping)])
1703+
else:
1704+
parallel_execute(
1705+
cr,
1706+
[
1707+
cr.mogrify(query, [json.dumps(mapping_chunk)]).decode()
1708+
for mapping_chunk in chunks(mapping.items(), bucket_size, fmt=dict)
1709+
],
1710+
)

0 commit comments

Comments
 (0)