Skip to content

Commit ed37a82

Browse files
authored
✨ transfer MySQL views as native SQLite views (#110)
1 parent 17d2d60 commit ed37a82

14 files changed

+760
-8
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ dependencies = [
4646
"python-dateutil>=2.9.0.post0",
4747
"python-slugify>=7.0.0",
4848
"simplejson>=3.19.0",
49+
"sqlglot>=27.27.0",
4950
"tqdm>=4.65.0",
5051
"tabulate",
5152
"typing-extensions; python_version < \"3.11\"",

requirements_dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ simplejson>=3.19.1
1717
types-simplejson
1818
sqlalchemy>=2.0.0
1919
sqlalchemy-utils
20+
sqlglot>=27.27.0
2021
types-sqlalchemy-utils
2122
tox
2223
tqdm>=4.65.0

src/mysql_to_sqlite3/cli.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,12 @@
142142
)
143143
@click.option("-l", "--log-file", type=click.Path(), help="Log file")
144144
@click.option("--json-as-text", is_flag=True, help="Transfer JSON columns as TEXT.")
145+
@click.option(
146+
"-T",
147+
"--mysql-views-as-tables",
148+
is_flag=True,
149+
help="Materialize MySQL VIEWs as SQLite tables (legacy behavior).",
150+
)
145151
@click.option(
146152
"-V",
147153
"--vacuum",
@@ -182,6 +188,7 @@ def cli(
182188
chunk: int,
183189
log_file: t.Union[str, "os.PathLike[t.Any]"],
184190
json_as_text: bool,
191+
mysql_views_as_tables: bool,
185192
vacuum: bool,
186193
use_buffered_cursors: bool,
187194
quiet: bool,
@@ -230,6 +237,7 @@ def cli(
230237
mysql_ssl_disabled=skip_ssl,
231238
chunk=chunk,
232239
json_as_text=json_as_text,
240+
views_as_views=not mysql_views_as_tables,
233241
vacuum=vacuum,
234242
buffered=use_buffered_cursors,
235243
log_file=log_file,

src/mysql_to_sqlite3/transporter.py

Lines changed: 165 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from mysql.connector import CharacterSet, errorcode
1616
from mysql.connector.abstracts import MySQLConnectionAbstract
1717
from mysql.connector.types import RowItemType
18+
from sqlglot import exp, parse_one
19+
from sqlglot.errors import ParseError
1820
from tqdm import tqdm, trange
1921

2022

@@ -120,6 +122,8 @@ def __init__(self, **kwargs: Unpack[MySQLtoSQLiteParams]) -> None:
120122

121123
self._quiet = bool(kwargs.get("quiet", False))
122124

125+
self._views_as_views = bool(kwargs.get("views_as_views", True))
126+
123127
self._sqlite_strict = bool(kwargs.get("sqlite_strict", False))
124128

125129
self._logger = self._setup_logger(log_file=kwargs.get("log_file") or None, quiet=self._quiet)
@@ -637,6 +641,7 @@ def _create_table(self, table_name: str, attempting_reconnect: bool = False) ->
637641
if not attempting_reconnect:
638642
self._logger.warning("Connection to MySQL server lost.\nAttempting to reconnect.")
639643
self._create_table(table_name, True)
644+
return
640645
else:
641646
self._logger.warning("Connection to MySQL server lost.\nReconnection attempt aborted.")
642647
raise
@@ -650,6 +655,130 @@ def _create_table(self, table_name: str, attempting_reconnect: bool = False) ->
650655
self._logger.error("SQLite failed creating table %s: %s", table_name, err)
651656
raise
652657

658+
@staticmethod
659+
def _mysql_viewdef_to_sqlite(
660+
view_select_sql: str,
661+
view_name: str,
662+
schema_name: t.Optional[str] = None,
663+
keep_schema: bool = False,
664+
) -> str:
665+
"""
666+
Convert a MySQL VIEW_DEFINITION (a SELECT ...) to a SQLite CREATE VIEW statement.
667+
668+
If keep_schema is False and schema_name is provided, strip qualifiers like `example`.table.
669+
If keep_schema is True, you must ATTACH the SQLite database as that schema name before using the view.
670+
"""
671+
# Normalize whitespace and avoid double semicolons in output
672+
cleaned_sql = view_select_sql.strip().rstrip(";")
673+
674+
try:
675+
tree = parse_one(cleaned_sql, read="mysql")
676+
except (ParseError, ValueError, Exception): # pylint: disable=W0718
677+
# Fallback: return a basic CREATE VIEW using the original SELECT
678+
return f'CREATE VIEW IF NOT EXISTS "{view_name}" AS\n{cleaned_sql};'
679+
680+
if not keep_schema and schema_name:
681+
# Remove schema qualifiers that match schema_name
682+
for tbl in tree.find_all(exp.Table):
683+
db = tbl.args.get("db")
684+
if db and db.name.strip('`"') == schema_name:
685+
tbl.set("db", None)
686+
687+
sqlite_select = tree.sql(dialect="sqlite")
688+
return f'CREATE VIEW IF NOT EXISTS "{view_name}" AS\n{sqlite_select};'
689+
690+
def _build_create_view_sql(self, view_name: str) -> str:
691+
"""Build a CREATE VIEW statement for SQLite from a MySQL VIEW definition."""
692+
# Try to obtain the view definition from information_schema.VIEWS
693+
definition: t.Optional[str] = None
694+
try:
695+
self._mysql_cur_dict.execute(
696+
"""
697+
SELECT VIEW_DEFINITION AS `definition`
698+
FROM information_schema.VIEWS
699+
WHERE TABLE_SCHEMA = %s
700+
AND TABLE_NAME = %s
701+
""",
702+
(self._mysql_database, view_name),
703+
)
704+
row: t.Optional[t.Dict[str, RowItemType]] = self._mysql_cur_dict.fetchone()
705+
if row is not None and row.get("definition") is not None:
706+
val = row["definition"]
707+
if isinstance(val, bytes):
708+
try:
709+
definition = val.decode()
710+
except UnicodeDecodeError:
711+
definition = str(val)
712+
else:
713+
definition = t.cast(str, val)
714+
except mysql.connector.Error:
715+
# Fall back to SHOW CREATE VIEW below
716+
definition = None
717+
718+
if not definition:
719+
# Fallback: use SHOW CREATE VIEW and extract the SELECT part
720+
try:
721+
# Escape backticks in the MySQL view name for safe interpolation
722+
safe_view_name = view_name.replace("`", "``")
723+
self._mysql_cur.execute(f"SHOW CREATE VIEW `{safe_view_name}`")
724+
res = self._mysql_cur.fetchone()
725+
if res and len(res) >= 2:
726+
create_stmt = res[1]
727+
if isinstance(create_stmt, bytes):
728+
try:
729+
create_stmt_str = create_stmt.decode()
730+
except UnicodeDecodeError:
731+
create_stmt_str = str(create_stmt)
732+
else:
733+
create_stmt_str = t.cast(str, create_stmt)
734+
# Extract the SELECT ... part after AS (supporting newlines)
735+
m = re.search(r"\bAS\b\s*(.*)$", create_stmt_str, re.IGNORECASE | re.DOTALL)
736+
if m:
737+
definition = m.group(1).strip().rstrip(";")
738+
else:
739+
# As a last resort, try to use the full statement replacing the prefix
740+
# Not ideal, but better than failing outright
741+
idx = create_stmt_str.upper().find(" AS ")
742+
if idx != -1:
743+
definition = create_stmt_str[idx + 4 :].strip().rstrip(";")
744+
except mysql.connector.Error:
745+
pass
746+
747+
if not definition:
748+
raise sqlite3.Error(f"Unable to fetch definition for MySQL view '{view_name}'")
749+
750+
return self._mysql_viewdef_to_sqlite(
751+
view_name=view_name,
752+
view_select_sql=definition,
753+
schema_name=self._mysql_database,
754+
)
755+
756+
def _create_view(self, view_name: str, attempting_reconnect: bool = False) -> None:
757+
try:
758+
if attempting_reconnect:
759+
self._mysql.reconnect()
760+
sql = self._build_create_view_sql(view_name)
761+
self._sqlite_cur.execute(sql)
762+
self._sqlite.commit()
763+
except mysql.connector.Error as err:
764+
if err.errno == errorcode.CR_SERVER_LOST:
765+
if not attempting_reconnect:
766+
self._logger.warning("Connection to MySQL server lost.\nAttempting to reconnect.")
767+
self._create_view(view_name, True)
768+
return
769+
else:
770+
self._logger.warning("Connection to MySQL server lost.\nReconnection attempt aborted.")
771+
raise
772+
self._logger.error(
773+
"MySQL failed reading view definition from view %s: %s",
774+
view_name,
775+
err,
776+
)
777+
raise
778+
except sqlite3.Error as err:
779+
self._logger.error("SQLite failed creating view %s: %s", view_name, err)
780+
raise
781+
653782
def _transfer_table_data(
654783
self, table_name: str, sql: str, total_records: int = 0, attempting_reconnect: bool = False
655784
) -> None:
@@ -693,6 +822,7 @@ def _transfer_table_data(
693822
total_records=total_records,
694823
attempting_reconnect=True,
695824
)
825+
return
696826
else:
697827
self._logger.warning("Connection to MySQL server lost.\nReconnection attempt aborted.")
698828
raise
@@ -720,7 +850,7 @@ def transfer(self) -> None:
720850

721851
self._mysql_cur_prepared.execute(
722852
"""
723-
SELECT TABLE_NAME
853+
SELECT TABLE_NAME, TABLE_TYPE
724854
FROM information_schema.TABLES
725855
WHERE TABLE_SCHEMA = SCHEMA()
726856
AND TABLE_NAME {exclude} IN ({placeholders})
@@ -730,25 +860,49 @@ def transfer(self) -> None:
730860
),
731861
specific_tables,
732862
)
733-
tables: t.Iterable[RowItemType] = (row[0] for row in self._mysql_cur_prepared.fetchall())
863+
tables: t.Iterable[t.Tuple[str, str]] = (
864+
(
865+
str(row[0].decode() if isinstance(row[0], (bytes, bytearray)) else row[0]),
866+
str(row[1].decode() if isinstance(row[1], (bytes, bytearray)) else row[1]),
867+
)
868+
for row in self._mysql_cur_prepared.fetchall()
869+
)
734870
else:
735871
# transfer all tables
736872
self._mysql_cur.execute(
737873
"""
738-
SELECT TABLE_NAME
874+
SELECT TABLE_NAME, TABLE_TYPE
739875
FROM information_schema.TABLES
740876
WHERE TABLE_SCHEMA = SCHEMA()
741877
"""
742878
)
743-
tables = (row[0].decode() for row in self._mysql_cur.fetchall()) # type: ignore[union-attr]
879+
880+
def _coerce_row(row: t.Any) -> t.Tuple[str, str]:
881+
try:
882+
# Row like (name, type)
883+
name = row[0].decode() if isinstance(row[0], (bytes, bytearray)) else row[0]
884+
ttype = (
885+
row[1].decode()
886+
if (isinstance(row, (list, tuple)) and len(row) > 1 and isinstance(row[1], (bytes, bytearray)))
887+
else (row[1] if (isinstance(row, (list, tuple)) and len(row) > 1) else "BASE TABLE")
888+
)
889+
return str(name), str(ttype)
890+
except (TypeError, IndexError, UnicodeDecodeError):
891+
# Fallback: treat as a single value name when row is not a 2-tuple or decoding fails
892+
name = row.decode() if isinstance(row, (bytes, bytearray)) else str(row)
893+
return name, "BASE TABLE"
894+
895+
tables = (_coerce_row(row) for row in self._mysql_cur.fetchall())
744896

745897
try:
746898
# turn off foreign key checking in SQLite while transferring data
747899
self._sqlite_cur.execute("PRAGMA foreign_keys=OFF")
748900

749-
for table_name in tables:
901+
for table_name, table_type in tables:
750902
if isinstance(table_name, bytes):
751903
table_name = table_name.decode()
904+
if isinstance(table_type, bytes):
905+
table_type = table_type.decode()
752906

753907
self._logger.info(
754908
"%s%sTransferring table %s",
@@ -761,10 +915,13 @@ def transfer(self) -> None:
761915
self._current_chunk_number = 0
762916

763917
if not self._without_tables:
764-
# create the table
765-
self._create_table(table_name) # type: ignore[arg-type]
918+
# create the table or view
919+
if table_type == "VIEW" and self._views_as_views:
920+
self._create_view(table_name) # type: ignore[arg-type]
921+
else:
922+
self._create_table(table_name) # type: ignore[arg-type]
766923

767-
if not self._without_data:
924+
if not self._without_data and not (table_type == "VIEW" and self._views_as_views):
768925
# get the size of the data
769926
if self._limit_rows > 0:
770927
# limit to the requested number of rows

src/mysql_to_sqlite3/types.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class MySQLtoSQLiteParams(TypedDict):
4444
without_tables: t.Optional[bool]
4545
without_data: t.Optional[bool]
4646
without_foreign_keys: t.Optional[bool]
47+
views_as_views: t.Optional[bool]
4748

4849

4950
class MySQLtoSQLiteAttributes:
@@ -81,6 +82,7 @@ class MySQLtoSQLiteAttributes:
8182
_vacuum: bool
8283
_without_data: bool
8384
_without_foreign_keys: bool
85+
_views_as_views: bool
8486
# Tracking of SQLite index names and counters to ensure uniqueness when prefixing is disabled
8587
_seen_sqlite_index_names: t.Set[str]
8688
_sqlite_index_name_counters: t.Dict[str, int]

tests/unit/test_cli_views_flag.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import typing as t
2+
3+
import pytest
4+
from click.testing import CliRunner
5+
6+
from mysql_to_sqlite3.cli import cli as mysql2sqlite
7+
8+
9+
class TestCLIViewsFlag:
10+
def test_mysql_views_as_tables_flag_is_threaded(self, monkeypatch: pytest.MonkeyPatch) -> None:
11+
"""Ensure --mysql-views-as-tables reaches MySQLtoSQLite as views_as_views=False (legacy materialization)."""
12+
received_kwargs: t.Dict[str, t.Any] = {}
13+
14+
class FakeConverter:
15+
def __init__(self, **kwargs: t.Any) -> None:
16+
received_kwargs.update(kwargs)
17+
18+
def transfer(self) -> None: # pragma: no cover - nothing to do
19+
return None
20+
21+
# Patch the converter used by the CLI
22+
monkeypatch.setattr("mysql_to_sqlite3.cli.MySQLtoSQLite", FakeConverter)
23+
24+
runner = CliRunner()
25+
result = runner.invoke(
26+
mysql2sqlite,
27+
[
28+
"-f",
29+
"out.sqlite3",
30+
"-d",
31+
"db",
32+
"-u",
33+
"user",
34+
"--mysql-views-as-tables",
35+
],
36+
)
37+
assert result.exit_code == 0
38+
assert received_kwargs.get("views_as_views") is False
39+
40+
def test_mysql_views_as_tables_short_flag_is_threaded(self, monkeypatch: pytest.MonkeyPatch) -> None:
41+
"""Ensure -T (short for --mysql-views-as-tables) reaches MySQLtoSQLite as views_as_views=False."""
42+
received_kwargs: t.Dict[str, t.Any] = {}
43+
44+
class FakeConverter:
45+
def __init__(self, **kwargs: t.Any) -> None:
46+
received_kwargs.update(kwargs)
47+
48+
def transfer(self) -> None: # pragma: no cover - nothing to do
49+
return None
50+
51+
# Patch the converter used by the CLI
52+
monkeypatch.setattr("mysql_to_sqlite3.cli.MySQLtoSQLite", FakeConverter)
53+
54+
runner = CliRunner()
55+
result = runner.invoke(
56+
mysql2sqlite,
57+
[
58+
"-f",
59+
"out.sqlite3",
60+
"-d",
61+
"db",
62+
"-u",
63+
"user",
64+
"-T",
65+
],
66+
)
67+
assert result.exit_code == 0
68+
assert received_kwargs.get("views_as_views") is False

0 commit comments

Comments
 (0)