@@ -5282,6 +5282,7 @@ def to_sql(
52825282 name : str ,
52835283 con : Union [str , "sqlalchemy.engine.Connection" , "sqlalchemy.engine.Engine" , "sqlite3.Connection" ],
52845284 batch_size : Optional [int ] = None ,
5285+ num_proc : Optional [int ] = None ,
52855286 ** sql_writer_kwargs ,
52865287 ) -> int :
52875288 """Exports the dataset to a SQL database.
@@ -5294,6 +5295,11 @@ def to_sql(
52945295 batch_size (`int`, *optional*):
52955296 Size of the batch to load in memory and write at once.
52965297 Defaults to `datasets.config.DEFAULT_MAX_BATCH_SIZE`.
5298+ num_proc (`int`, *optional*):
5299+ Number of processes for multiprocessing. By default, it doesn't
5300+ use multiprocessing. `batch_size` in this case defaults to
5301+ `datasets.config.DEFAULT_MAX_BATCH_SIZE` but feel free to make it 5x or 10x of the default
5302+ value if you have sufficient compute power.
52975303 **sql_writer_kwargs (additional keyword arguments):
52985304 Parameters to pass to pandas's [`pandas.DataFrame.to_sql`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_sql.html).
52995305
@@ -5324,7 +5330,7 @@ def to_sql(
53245330 # Dynamic import to avoid circular dependency
53255331 from .io .sql import SqlDatasetWriter
53265332
5327- return SqlDatasetWriter (self , name , con , batch_size = batch_size , ** sql_writer_kwargs ).write ()
5333+ return SqlDatasetWriter (self , name , con , batch_size = batch_size , num_proc = num_proc , ** sql_writer_kwargs ).write ()
53285334
53295335 def _estimate_nbytes (self ) -> int :
53305336 dataset_nbytes = self .data .nbytes
0 commit comments