[ADD] util/update_table_from_dict

Pirols · Pirols · commit e3ab180487ed · 2025-07-22T17:01:07.000+02:00
A recurrent challenge in writing upgrade scripts is that of updating values in a
table based on some form of already available mapping from the id (or another
identifier) to the new value, this is often addressed with an iterative solution
in the form:

```python
for key, value in mapping.items():
    cr.execute(
        """
        UPDATE table
           SET col = %s
         WHERE key_col = %s
        """,
        [value, key],
    )
```

or in a more efficient (only issuing a single query) but hacky way:

```python
cr.execute(
    """
    UPDATE table
       SET col = (%s::jsonb)-&gt;&gt;(key_col::text)
     WHERE key_col = ANY(%s)
    """,
    [json.dumps(mapping), list(mapping)],
)
```

With the former being ineffective for big mappings and the latter often
requiring some comments at review time to get it right.
This commit introduces a util meant to make it easier to efficiently perform
such updates.
diff --git a/src/base/tests/test_util.py b/src/base/tests/test_util.py
@@ -881,6 +881,60 @@ def test_parallel_execute_retry_on_serialization_failure(self):
         cr.execute(util.format_query(cr, "SELECT 1 FROM {}", TEST_TABLE_NAME))
         self.assertFalse(cr.rowcount)
 
+    def test_update_table_from_dict(self):
+        TEST_TABLE_NAME = "_upgrade_update_table_from_dict_test_table"
+        N_ROWS = 10
+
+        cr = self._get_cr()
+
+        cr.execute(
+            util.format_query(
+                cr,
+                """
+                DROP TABLE IF EXISTS {table};
+
+                CREATE TABLE {table} (
+                    id SERIAL PRIMARY KEY,
+                    col1 INTEGER,
+                    col2 INTEGER
+                );
+
+                INSERT INTO {table} (col1, col2) SELECT v, v FROM GENERATE_SERIES(1, %s) as v;
+                """
+                % N_ROWS,
+                table=TEST_TABLE_NAME,
+            )
+        )
+        mapping = {id: {"col1": id * 2} for id in range(1, N_ROWS + 1, 2)}
+        util.update_table_from_dict(cr, TEST_TABLE_NAME, mapping)
+
+        cr.execute(
+            util.format_query(
+                cr,
+                "SELECT id FROM {table} WHERE col2 != id",
+                table=TEST_TABLE_NAME,
+            )
+        )
+        self.assertFalse(cr.rowcount)  # otherwise unintended column is affected
+
+        cr.execute(
+            util.format_query(
+                cr,
+                "SELECT id FROM {table} WHERE col1 != id AND MOD(id, 2) = 0",
+                table=TEST_TABLE_NAME,
+            )
+        )
+        self.assertFalse(cr.rowcount)  # otherwise unintended rows are affected
+
+        cr.execute(
+            util.format_query(
+                cr,
+                "SELECT id FROM {table} WHERE col1 != 2 * id AND MOD(id, 2) = 1",
+                table=TEST_TABLE_NAME,
+            )
+        )
+        self.assertFalse(cr.rowcount)  # otherwise intended rows are not affected
+
     def test_create_column_with_fk(self):
         cr = self.env.cr
         self.assertFalse(util.column_exists(cr, "res_partner", "_test_lang_id"))
diff --git a/src/util/pg.py b/src/util/pg.py
@@ -2,6 +2,7 @@
 """Utility functions for interacting with PostgreSQL."""
 
 import collections
+import json
 import logging
 import os
 import re
@@ -43,7 +44,7 @@
 
 from .exceptions import MigrationError, SleepyDeveloperError
 from .helpers import _validate_table, model_of_table
-from .misc import Sentinel, log_progress, version_gte
+from .misc import Sentinel, chunks, log_progress, version_gte
 
 _logger = logging.getLogger(__name__)
 
@@ -1621,3 +1622,89 @@ def create_id_sequence(cr, table, set_as_default=True):
                 table=table_sql,
             )
         )
+
+
+def update_table_from_dict(cr, table, mapping, key_col="id", bucket_size=DEFAULT_BUCKET_SIZE):
+    """
+    Update table's rows based on mapping.
+
+    Efficiently updates rows in a table by mapping an identifier column (`key_col`) value to the new values for the provided set of columns.
+
+    .. example::
+
+       .. code-block:: python
+
+          util.update_table_from_dict(
+              cr,
+              "account_move",
+              {
+                  1: {"closing_return_id": 2, "always_tax_eligible": True},
+                  2: {"closing_return_id": 3, "always_tax_eligible": False},
+              },
+          )
+
+    :param str table: the table to update
+    :param dict[any, dict[str, any]] mapping: mapping of `key_col` identifiers to maps of column names to their new value
+
+                                              .. example::
+
+                                                 .. code-block:: python
+
+                                                    mapping = {
+                                                        1: {"col1": 123, "col2": "foo"},
+                                                        2: {"col1": 456, "col2": "bar"},
+                                                    }
+
+                                              .. warning::
+
+                                                 All maps should have the exact same set of keys (column names). The following
+                                                 example would behave unpredictably:
+
+                                                 .. code-block:: python
+
+                                                    # WRONG
+                                                    mapping = {
+                                                        1: {"col1": 123, "col2": "foo"},
+                                                        2: {"col1": 456},
+                                                    }
+
+                                                 Either resulting in `col2` updates being ignored or setting it to NULL for row 2.
+
+    :param str key_col: The column to match the key against (`id` by default)
+    :param int bucket_size: maximum number of rows to update per single query
+    """
+    if not mapping:
+        return
+
+    _validate_table(table)
+
+    column_names = list(next(iter(mapping.values())).keys())
+    query = cr.mogrify(
+        format_query(
+            cr,
+            """
+            UPDATE {table} t
+               SET ({columns_list}) = ROW({values_list})
+              FROM JSONB_EACH(%%s) m
+             WHERE t.{key_col}::varchar = m.key
+            """,
+            table=table,
+            columns_list=ColumnList.from_unquoted(cr, column_names),
+            values_list=sql.SQL(", ").join(
+                sql.SQL("(m.value->>%s)::{}").format(sql.SQL(column_type(cr, table, col))) for col in column_names
+            ),
+            key_col=key_col,
+        ),
+        column_names,
+    )
+
+    if len(mapping) <= 1.1 * bucket_size:
+        cr.execute(query, [json.dumps(mapping)])
+    else:
+        parallel_execute(
+            cr,
+            [
+                cr.mogrify(query, [json.dumps(mapping_chunk)]).decode()
+                for mapping_chunk in chunks(mapping.items(), bucket_size, fmt=dict)
+            ],
+        )