From 877d29db7f81686a33a7cc0181e7965a88bada1a Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Sat, 18 Oct 2025 12:53:19 +0100 Subject: [PATCH 1/6] :sparkles: add duplicate index name handling for SQLite to prevent collisions --- src/mysql_to_sqlite3/transporter.py | 44 +++++++++++++++++++++++------ src/mysql_to_sqlite3/types.py | 2 ++ 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/src/mysql_to_sqlite3/transporter.py b/src/mysql_to_sqlite3/transporter.py index 306fa10..5d0a2ea 100644 --- a/src/mysql_to_sqlite3/transporter.py +++ b/src/mysql_to_sqlite3/transporter.py @@ -146,6 +146,11 @@ def __init__(self, **kwargs: Unpack[MySQLtoSQLiteParams]) -> None: self._sqlite_json1_extension_enabled = not self._json_as_text and self._check_sqlite_json1_extension_enabled() + # Track seen SQLite index names to detect duplicates when prefixing is disabled + self._seen_sqlite_index_names: t.Set[str] = set() + # Collected duplicate index names that were skipped due to global name collision + self._skipped_duplicate_sqlite_indices: t.List[str] = [] + try: _mysql_connection = mysql.connector.connect( user=self._mysql_user, @@ -523,16 +528,27 @@ def _build_create_table_sql(self, table_name: str) -> str: columns=", ".join(f'"{column}"' for column in columns.split(",")) ) else: - indices += """CREATE {unique} INDEX IF NOT EXISTS "{name}" ON "{table}" ({columns});""".format( - unique="UNIQUE" if index["unique"] in {1, "1"} else "", - name=( - f"{table_name}_{index_name}" - if (table_collisions > 0 or self._prefix_indices) - else index_name - ), - table=table_name, - columns=", ".join(f'"{column}"' for column in columns.split(",")), + # Determine the SQLite index name, considering table name collisions and prefix option + proposed_index_name = ( + f"{table_name}_{index_name}" + if (table_collisions > 0 or self._prefix_indices) + else index_name ) + # If prefixing is disabled, ensure index names are unique across the whole SQLite database + if not self._prefix_indices and proposed_index_name in self._seen_sqlite_index_names: + # Skip duplicate index and remember for a consolidated notice at the end + self._skipped_duplicate_sqlite_indices.append(proposed_index_name) + else: + # Record first occurrence and emit CREATE INDEX + self._seen_sqlite_index_names.add(proposed_index_name) + indices += ( + """CREATE {unique} INDEX IF NOT EXISTS "{name}" ON "{table}" ({columns});""".format( + unique="UNIQUE" if index["unique"] in {1, "1"} else "", + name=proposed_index_name, + table=table_name, + columns=", ".join(f'"{column}"' for column in columns.split(",")), + ) + ) sql += primary sql = sql.rstrip(", ") @@ -774,6 +790,16 @@ def transfer(self) -> None: # re-enable foreign key checking once done transferring self._sqlite_cur.execute("PRAGMA foreign_keys=ON") + # If any indices were skipped due to duplicate names, inform the user once at the end + if self._skipped_duplicate_sqlite_indices: + unique_skipped = sorted(set(self._skipped_duplicate_sqlite_indices)) + self._logger.warning( + "Skipped creating the following INDEX, an index with the same index name already exists:\n %s\n" + "Please use the `-K` option to prefix indices with their corresponding tables. " + "This ensures that their names remain unique across the SQLite database.", + "\n ".join(unique_skipped), + ) + if self._vacuum: self._logger.info("Vacuuming created SQLite database file.\nThis might take a while.") self._sqlite_cur.execute("VACUUM") diff --git a/src/mysql_to_sqlite3/types.py b/src/mysql_to_sqlite3/types.py index 9bc536f..5bc8dfb 100644 --- a/src/mysql_to_sqlite3/types.py +++ b/src/mysql_to_sqlite3/types.py @@ -81,3 +81,5 @@ class MySQLtoSQLiteAttributes: _vacuum: bool _without_data: bool _without_foreign_keys: bool + _seen_sqlite_index_names: t.Set[str] + _skipped_duplicate_sqlite_indexes: t.List[str] From 1eac50547c1c86d0c29a326172bc0d1e38b01b5f Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Sat, 18 Oct 2025 13:13:26 +0100 Subject: [PATCH 2/6] :sparkles: implement unique index name generation for SQLite to prevent collisions --- src/mysql_to_sqlite3/transporter.py | 61 +++++++++++++++++------------ src/mysql_to_sqlite3/types.py | 3 +- tests/unit/mysql_to_sqlite3_test.py | 25 ++++++++++++ 3 files changed, 63 insertions(+), 26 deletions(-) diff --git a/src/mysql_to_sqlite3/transporter.py b/src/mysql_to_sqlite3/transporter.py index 5d0a2ea..802399b 100644 --- a/src/mysql_to_sqlite3/transporter.py +++ b/src/mysql_to_sqlite3/transporter.py @@ -146,10 +146,10 @@ def __init__(self, **kwargs: Unpack[MySQLtoSQLiteParams]) -> None: self._sqlite_json1_extension_enabled = not self._json_as_text and self._check_sqlite_json1_extension_enabled() - # Track seen SQLite index names to detect duplicates when prefixing is disabled + # Track seen SQLite index names to generate unique names when prefixing is disabled self._seen_sqlite_index_names: t.Set[str] = set() - # Collected duplicate index names that were skipped due to global name collision - self._skipped_duplicate_sqlite_indices: t.List[str] = [] + # Counter for duplicate index names to assign numeric suffixes (name_2, name_3, ...) + self._sqlite_index_name_counters: t.Dict[str, int] = {} try: _mysql_connection = mysql.connector.connect( @@ -414,6 +414,28 @@ def _check_sqlite_json1_extension_enabled(self) -> bool: except sqlite3.Error: return False + def _get_unique_index_name(self, base_name: str) -> str: + """Return a unique SQLite index name based on base_name. + + If base_name has not been used yet, it is returned as-is and recorded. If it has been + used, a numeric suffix is appended starting from 2 (e.g., name_2, name_3, ...), and the + chosen name is recorded as used. This behavior is only intended for cases where index + prefixing is not enabled and SQLite requires global uniqueness for index names. + """ + if base_name not in self._seen_sqlite_index_names: + self._seen_sqlite_index_names.add(base_name) + return base_name + # Base name already seen — assign next available counter + next_num = self._sqlite_index_name_counters.get(base_name, 2) + candidate = f"{base_name}_{next_num}" + while candidate in self._seen_sqlite_index_names: + next_num += 1 + candidate = f"{base_name}_{next_num}" + # Record chosen candidate and bump counter for the base name + self._seen_sqlite_index_names.add(candidate) + self._sqlite_index_name_counters[base_name] = next_num + 1 + return candidate + def _build_create_table_sql(self, table_name: str) -> str: sql: str = f'CREATE TABLE IF NOT EXISTS "{table_name}" (' primary: str = "" @@ -534,21 +556,19 @@ def _build_create_table_sql(self, table_name: str) -> str: if (table_collisions > 0 or self._prefix_indices) else index_name ) - # If prefixing is disabled, ensure index names are unique across the whole SQLite database - if not self._prefix_indices and proposed_index_name in self._seen_sqlite_index_names: - # Skip duplicate index and remember for a consolidated notice at the end - self._skipped_duplicate_sqlite_indices.append(proposed_index_name) + # Ensure index name is unique across the whole SQLite database when prefixing is disabled + if not self._prefix_indices: + unique_index_name = self._get_unique_index_name(proposed_index_name) else: - # Record first occurrence and emit CREATE INDEX - self._seen_sqlite_index_names.add(proposed_index_name) - indices += ( - """CREATE {unique} INDEX IF NOT EXISTS "{name}" ON "{table}" ({columns});""".format( - unique="UNIQUE" if index["unique"] in {1, "1"} else "", - name=proposed_index_name, - table=table_name, - columns=", ".join(f'"{column}"' for column in columns.split(",")), - ) + unique_index_name = proposed_index_name + indices += ( + """CREATE {unique} INDEX IF NOT EXISTS "{name}" ON "{table}" ({columns});""".format( + unique="UNIQUE" if index["unique"] in {1, "1"} else "", + name=unique_index_name, + table=table_name, + columns=", ".join(f'"{column}"' for column in columns.split(",")), ) + ) sql += primary sql = sql.rstrip(", ") @@ -790,15 +810,6 @@ def transfer(self) -> None: # re-enable foreign key checking once done transferring self._sqlite_cur.execute("PRAGMA foreign_keys=ON") - # If any indices were skipped due to duplicate names, inform the user once at the end - if self._skipped_duplicate_sqlite_indices: - unique_skipped = sorted(set(self._skipped_duplicate_sqlite_indices)) - self._logger.warning( - "Skipped creating the following INDEX, an index with the same index name already exists:\n %s\n" - "Please use the `-K` option to prefix indices with their corresponding tables. " - "This ensures that their names remain unique across the SQLite database.", - "\n ".join(unique_skipped), - ) if self._vacuum: self._logger.info("Vacuuming created SQLite database file.\nThis might take a while.") diff --git a/src/mysql_to_sqlite3/types.py b/src/mysql_to_sqlite3/types.py index 5bc8dfb..eb0d488 100644 --- a/src/mysql_to_sqlite3/types.py +++ b/src/mysql_to_sqlite3/types.py @@ -81,5 +81,6 @@ class MySQLtoSQLiteAttributes: _vacuum: bool _without_data: bool _without_foreign_keys: bool + # Tracking of SQLite index names and counters to ensure uniqueness when prefixing is disabled _seen_sqlite_index_names: t.Set[str] - _skipped_duplicate_sqlite_indexes: t.List[str] + _sqlite_index_name_counters: t.Dict[str, int] diff --git a/tests/unit/mysql_to_sqlite3_test.py b/tests/unit/mysql_to_sqlite3_test.py index 6e0c4d7..de99137 100644 --- a/tests/unit/mysql_to_sqlite3_test.py +++ b/tests/unit/mysql_to_sqlite3_test.py @@ -579,3 +579,28 @@ def fetchmany(self, size: int = 1) -> t.Any: with pytest.raises((mysql.connector.Error, sqlite3.Error)): proc._transfer_table_data(table_name, sql) + + + +def test_get_unique_index_name_suffixing_sequence() -> None: + from unittest.mock import patch + + # Create an instance without running the real constructor + with patch.object(MySQLtoSQLite, "__init__", return_value=None): + t = MySQLtoSQLite() + # minimal attributes required by the helper + t._seen_sqlite_index_names = set() + t._sqlite_index_name_counters = {} + t._prefix_indices = False + + # First occurrence: no suffix + assert t._get_unique_index_name("idx_page_id") == "idx_page_id" + # Second occurrence: _2 + assert t._get_unique_index_name("idx_page_id") == "idx_page_id_2" + # Third occurrence: _3 + assert t._get_unique_index_name("idx_page_id") == "idx_page_id_3" + + # A different base name should start without suffix + assert t._get_unique_index_name("idx_user_id") == "idx_user_id" + # And then suffix from 2 + assert t._get_unique_index_name("idx_user_id") == "idx_user_id_2" From 989780b36d57593cbe23c129e12b03c706a9afd6 Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Sat, 18 Oct 2025 13:19:22 +0100 Subject: [PATCH 3/6] :art: refactor index creation logic for improved readability and maintainability --- src/mysql_to_sqlite3/transporter.py | 13 +++++-------- tests/unit/mysql_to_sqlite3_test.py | 1 - 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/mysql_to_sqlite3/transporter.py b/src/mysql_to_sqlite3/transporter.py index 802399b..6919183 100644 --- a/src/mysql_to_sqlite3/transporter.py +++ b/src/mysql_to_sqlite3/transporter.py @@ -561,13 +561,11 @@ def _build_create_table_sql(self, table_name: str) -> str: unique_index_name = self._get_unique_index_name(proposed_index_name) else: unique_index_name = proposed_index_name - indices += ( - """CREATE {unique} INDEX IF NOT EXISTS "{name}" ON "{table}" ({columns});""".format( - unique="UNIQUE" if index["unique"] in {1, "1"} else "", - name=unique_index_name, - table=table_name, - columns=", ".join(f'"{column}"' for column in columns.split(",")), - ) + indices += """CREATE {unique} INDEX IF NOT EXISTS "{name}" ON "{table}" ({columns});""".format( + unique="UNIQUE" if index["unique"] in {1, "1"} else "", + name=unique_index_name, + table=table_name, + columns=", ".join(f'"{column}"' for column in columns.split(",")), ) sql += primary @@ -810,7 +808,6 @@ def transfer(self) -> None: # re-enable foreign key checking once done transferring self._sqlite_cur.execute("PRAGMA foreign_keys=ON") - if self._vacuum: self._logger.info("Vacuuming created SQLite database file.\nThis might take a while.") self._sqlite_cur.execute("VACUUM") diff --git a/tests/unit/mysql_to_sqlite3_test.py b/tests/unit/mysql_to_sqlite3_test.py index de99137..c8330fd 100644 --- a/tests/unit/mysql_to_sqlite3_test.py +++ b/tests/unit/mysql_to_sqlite3_test.py @@ -581,7 +581,6 @@ def fetchmany(self, size: int = 1) -> t.Any: proc._transfer_table_data(table_name, sql) - def test_get_unique_index_name_suffixing_sequence() -> None: from unittest.mock import patch From ad2cb10dbeae5dab6d80e6fcead477e2b3ee3fc0 Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Sat, 18 Oct 2025 13:23:18 +0100 Subject: [PATCH 4/6] :rotating_light: update pylint disable codes for improved linting accuracy --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index b84c02f..e377116 100644 --- a/tox.ini +++ b/tox.ini @@ -112,4 +112,4 @@ import-order-style = pycharm application-import-names = flake8 [pylint] -disable = C0209,C0301,C0411,R,W0107,W0622 \ No newline at end of file +disable = C0209,C0301,C0411,R,W0107,W0622,C0103 \ No newline at end of file From 9e40fb07909b0c48e72110b36198ccff9edd33a9 Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Sat, 18 Oct 2025 13:26:47 +0100 Subject: [PATCH 5/6] :loud_sound: add logging for index renaming to ensure uniqueness in SQLite --- src/mysql_to_sqlite3/transporter.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mysql_to_sqlite3/transporter.py b/src/mysql_to_sqlite3/transporter.py index 6919183..9a537b0 100644 --- a/src/mysql_to_sqlite3/transporter.py +++ b/src/mysql_to_sqlite3/transporter.py @@ -434,6 +434,11 @@ def _get_unique_index_name(self, base_name: str) -> str: # Record chosen candidate and bump counter for the base name self._seen_sqlite_index_names.add(candidate) self._sqlite_index_name_counters[base_name] = next_num + 1 + self._logger.info( + 'Index "%s" renamed to "%s" to ensure uniqueness across the SQLite database.', + base_name, + candidate, + ) return candidate def _build_create_table_sql(self, table_name: str) -> str: From 1c45dd6a0c46d733400bc7f774131a7a30c35e3d Mon Sep 17 00:00:00 2001 From: Klemen Tusar Date: Sat, 18 Oct 2025 13:50:06 +0100 Subject: [PATCH 6/6] :white_check_mark: add unit test for unique index name suffixing in MySQLtoSQLite --- tests/unit/mysql_to_sqlite3_test.py | 24 ------------------------ tests/unit/test_transporter.py | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/tests/unit/mysql_to_sqlite3_test.py b/tests/unit/mysql_to_sqlite3_test.py index c8330fd..6e0c4d7 100644 --- a/tests/unit/mysql_to_sqlite3_test.py +++ b/tests/unit/mysql_to_sqlite3_test.py @@ -579,27 +579,3 @@ def fetchmany(self, size: int = 1) -> t.Any: with pytest.raises((mysql.connector.Error, sqlite3.Error)): proc._transfer_table_data(table_name, sql) - - -def test_get_unique_index_name_suffixing_sequence() -> None: - from unittest.mock import patch - - # Create an instance without running the real constructor - with patch.object(MySQLtoSQLite, "__init__", return_value=None): - t = MySQLtoSQLite() - # minimal attributes required by the helper - t._seen_sqlite_index_names = set() - t._sqlite_index_name_counters = {} - t._prefix_indices = False - - # First occurrence: no suffix - assert t._get_unique_index_name("idx_page_id") == "idx_page_id" - # Second occurrence: _2 - assert t._get_unique_index_name("idx_page_id") == "idx_page_id_2" - # Third occurrence: _3 - assert t._get_unique_index_name("idx_page_id") == "idx_page_id_3" - - # A different base name should start without suffix - assert t._get_unique_index_name("idx_user_id") == "idx_user_id" - # And then suffix from 2 - assert t._get_unique_index_name("idx_user_id") == "idx_user_id_2" diff --git a/tests/unit/test_transporter.py b/tests/unit/test_transporter.py index 9c91268..3c9d4fa 100644 --- a/tests/unit/test_transporter.py +++ b/tests/unit/test_transporter.py @@ -44,6 +44,27 @@ def test_decode_column_type_with_non_string_non_bytes(self) -> None: assert MySQLtoSQLite._decode_column_type(None) == "None" assert MySQLtoSQLite._decode_column_type(True) == "True" + def test_get_unique_index_name_suffixing_sequence(self) -> None: + with patch.object(MySQLtoSQLite, "__init__", return_value=None): + instance = MySQLtoSQLite() + # minimal attributes required by the helper + instance._seen_sqlite_index_names = set() + instance._sqlite_index_name_counters = {} + instance._prefix_indices = False + instance._logger = MagicMock() + + # First occurrence: no suffix + assert instance._get_unique_index_name("idx_page_id") == "idx_page_id" + # Second occurrence: _2 + assert instance._get_unique_index_name("idx_page_id") == "idx_page_id_2" + # Third occurrence: _3 + assert instance._get_unique_index_name("idx_page_id") == "idx_page_id_3" + + # A different base name should start without suffix + assert instance._get_unique_index_name("idx_user_id") == "idx_user_id" + # And then suffix from 2 + assert instance._get_unique_index_name("idx_user_id") == "idx_user_id_2" + @patch("sqlite3.connect") def test_check_sqlite_json1_extension_enabled_success(self, mock_connect: MagicMock) -> None: """Test _check_sqlite_json1_extension_enabled when JSON1 is enabled."""