Fine-Tuning Scheduler Tutorial Update for Lightning/PyTorch 2.5.0 (#372)

speediedan · web-flow · commit 1eb64a1d86cf · 2024-12-22T17:17:12.000+09:00
diff --git a/lightning_examples/finetuning-scheduler/finetuning-scheduler.py b/lightning_examples/finetuning-scheduler/finetuning-scheduler.py
@@ -147,6 +147,8 @@
 #
 # - ``ddp`` (and aliases ``ddp_find_unused_parameters_false``, ``ddp_find_unused_parameters_true``, ``ddp_spawn``, ``ddp_fork``, ``ddp_notebook``)
 # - ``fsdp`` (and alias ``fsdp_cpu_offload``)
+# - **NEW**: ``ModelParallelStrategy``
+#   - [See this example](https://finetuning-scheduler.readthedocs.io/en/stable/distributed/model_parallel_scheduled_fine_tuning.html) using FTS with PyTorch's composable distributed (e.g. ``fully_shard``, ``checkpoint``) and Tensor Parallelism (TP) APIs
 #
 # Custom or officially unsupported strategies can be used by setting [FinetuningScheduler.allow_untested](https://finetuning-scheduler.readthedocs.io/en/stable/api/finetuning_scheduler.fts.html?highlight=allow_untested#finetuning_scheduler.fts.FinetuningScheduler.params.allow_untested) to ``True``.
 # Note that most currently unsupported strategies are so because they require varying degrees of modification to be compatible. For example, ``deepspeed`` will require a [StrategyAdapter](https://finetuning-scheduler.readthedocs.io/en/stable/api/finetuning_scheduler.strategy_adapters.html#finetuning_scheduler.strategy_adapters.StrategyAdapter) to be written (similar to the one for ``FSDP``, [FSDPStrategyAdapter](https://finetuning-scheduler.readthedocs.io/en/stable/api/finetuning_scheduler.strategy_adapters.html#finetuning_scheduler.strategy_adapters.FSDPStrategyAdapter)) before support can be added (PRs welcome!),
@@ -260,7 +262,10 @@ def __init__(
         self.save_hyperparameters()
         os.environ["TOKENIZERS_PARALLELISM"] = "true" if self.hparams.tokenizers_parallelism else "false"
         self.tokenizer = AutoTokenizer.from_pretrained(
-            self.hparams.model_name_or_path, use_fast=True, local_files_only=False
+            self.hparams.model_name_or_path,
+            use_fast=True,
+            local_files_only=False,
+            clean_up_tokenization_spaces=True,
         )
 
     def prepare_data(self):
diff --git a/lightning_examples/finetuning-scheduler/requirements.txt b/lightning_examples/finetuning-scheduler/requirements.txt
@@ -1,2 +1,2 @@
 datasets >=2.17.0  # to allow explicitly setting `trust_remote_code`
-finetuning-scheduler[examples] <=2.4.0
+finetuning-scheduler[examples] <=2.5.0

Original file line number	Diff line number	Diff line change
`@@ -147,6 +147,8 @@`
`147`	`147`	`#`
`148`	`148`	# - ``ddp`` (and aliases ``ddp_find_unused_parameters_false``, ``ddp_find_unused_parameters_true``, ``ddp_spawn``, ``ddp_fork``, ``ddp_notebook``)
`149`	`149`	# - ``fsdp`` (and alias ``fsdp_cpu_offload``)
	`150`	+# - NEW: ``ModelParallelStrategy``
	`151`	+# - [See this example](https://finetuning-scheduler.readthedocs.io/en/stable/distributed/model_parallel_scheduled_fine_tuning.html) using FTS with PyTorch's composable distributed (e.g. ``fully_shard``, ``checkpoint``) and Tensor Parallelism (TP) APIs
`150`	`152`	`#`
`151`	`153`	# Custom or officially unsupported strategies can be used by setting [FinetuningScheduler.allow_untested](https://finetuning-scheduler.readthedocs.io/en/stable/api/finetuning_scheduler.fts.html?highlight=allow_untested#finetuning_scheduler.fts.FinetuningScheduler.params.allow_untested) to ``True``.
`152`	`154`	# Note that most currently unsupported strategies are so because they require varying degrees of modification to be compatible. For example, ``deepspeed`` will require a [StrategyAdapter](https://finetuning-scheduler.readthedocs.io/en/stable/api/finetuning_scheduler.strategy_adapters.html#finetuning_scheduler.strategy_adapters.StrategyAdapter) to be written (similar to the one for ``FSDP``, [FSDPStrategyAdapter](https://finetuning-scheduler.readthedocs.io/en/stable/api/finetuning_scheduler.strategy_adapters.html#finetuning_scheduler.strategy_adapters.FSDPStrategyAdapter)) before support can be added (PRs welcome!),
`@@ -260,7 +262,10 @@ def __init__(`
`260`	`262`	`self.save_hyperparameters()`
`261`	`263`	`os.environ["TOKENIZERS_PARALLELISM"] = "true" if self.hparams.tokenizers_parallelism else "false"`
`262`	`264`	`self.tokenizer = AutoTokenizer.from_pretrained(`
`263`		`- self.hparams.model_name_or_path, use_fast=True, local_files_only=False`
	`265`	`+ self.hparams.model_name_or_path,`
	`266`	`+ use_fast=True,`
	`267`	`+ local_files_only=False,`
	`268`	`+ clean_up_tokenization_spaces=True,`
`264`	`269`	`)`
`265`	`270`
`266`	`271`	`def prepare_data(self):`
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	datasets >=2.17.0 # to allow explicitly setting `trust_remote_code`
`2`		`-finetuning-scheduler[examples] <=2.4.0`
	`2`	`+finetuning-scheduler[examples] <=2.5.0`