Skip to content

Commit 0b41987

Browse files
committed
CFR: Improve importing data re. type mapping without NumPy
Using orjson instead of Polars does not introduce NumPy types.
1 parent ab6d40b commit 0b41987

File tree

3 files changed

+17
-7
lines changed

3 files changed

+17
-7
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
- CFR: Improved log output
66
- CFR: Fixed double quoting of table name. Thanks, @karynzv.
77
- CFR: When importing, use `replace` policy instead of `append`
8+
- CFR: Improved importing data re. type mapping without NumPy
89

910
## 2025/07/01 v0.0.37
1011
- Settings: Fixed comparison of `0s` vs `0ms`. Thanks, @hlcianfagna.

cratedb_toolkit/cfr/systable.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,11 @@
2323
import typing as t
2424
from pathlib import Path
2525

26+
import orjsonl
27+
import pandas as pd
28+
from sqlalchemy_cratedb import insert_bulk
2629
from tqdm.contrib.logging import logging_redirect_tqdm
30+
2731
if t.TYPE_CHECKING:
2832
import polars as pl
2933

@@ -247,20 +251,25 @@ def _load(self, path_schema: Path, path_data: Path):
247251

248252
# Load data.
249253
try:
250-
df: "pl.DataFrame" = self.load_table(path_table_data)
251-
df.write_database(table_name=tablename_restored, connection=self.dburi, if_table_exists="replace")
254+
df: "pd.DataFrame" = pd.DataFrame.from_records(self.load_table(path_table_data))
255+
df.to_sql(
256+
name=tablename_restored,
257+
con=self.adapter.engine,
258+
index=False,
259+
if_exists="replace",
260+
method=insert_bulk,
261+
)
252262
except Exception as ex:
253263
error_logger(self.debug)(f"Importing table failed: {tablename}. Reason: {ex}")
254264

255265
logger.info(f"Successfully imported {table_count} system tables")
256-
# df.to_pandas().to_sql(name=tablename, con=self.adapter.engine, if_exists="append", index=False) # noqa: ERA001, E501
257266

258-
def load_table(self, path: Path) -> "pl.DataFrame":
267+
def load_table(self, path: Path) -> t.List:
259268
import polars as pl
260269

261270
if path.suffix in [".jsonl"]:
262-
return pl.read_ndjson(path)
271+
return orjsonl.load(path)
263272
elif path.suffix in [".parquet", ".pq"]:
264-
return pl.read_parquet(path)
273+
return pl.read_parquet(path).to_pandas().to_dict("records")
265274
else:
266275
raise NotImplementedError(f"Input format not implemented: {path.suffix}")

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ dependencies = [
110110
"python-slugify<9",
111111
"pyyaml<7",
112112
"requests>=2.28,<3",
113-
"sqlalchemy-cratedb>=0.41",
113+
"sqlalchemy-cratedb>=0.42.0.dev2",
114114
"sqlparse<0.6",
115115
"tqdm<5",
116116
"typing-extensions<5; python_version<='3.7'",

0 commit comments

Comments
 (0)