Add FSDP2 example (#6411)

slin000111 · web-flow · commit 376640797e13 · 2025-11-07T11:54:27.000+08:00
* update faq

* Fixed the inconsistencies between the Chinese and English FAQ documentation.

* Update link to sequence parallel example

* support llava-onevision-1.5

* update model list

* update model list

* add test

* Update test_vision.py

* add fsdp2

* modify fsdp2 config
diff --git a/examples/train/multi-gpu/fsdp2_lora/fsdp2.json b/examples/train/multi-gpu/fsdp2_lora/fsdp2.json
@@ -0,0 +1,25 @@
+{
+  "compute_environment": "LOCAL_MACHINE",
+  "debug": false,
+  "distributed_type": "FSDP",
+  "downcast_bf16": "no",
+  "fsdp_config": {
+    "fsdp_auto_wrap_policy": "TRANSFORMER_BASED_WRAP",
+    "fsdp_cpu_ram_efficient_loading": true,
+    "fsdp_reshard_after_forward": true,
+    "fsdp_state_dict_type": "FULL_STATE_DICT",
+    "fsdp_activation_checkpointing": true,
+    "fsdp_version": 2
+  },
+  "machine_rank": 0,
+  "main_training_function": "main",
+  "mixed_precision": "bf16",
+  "num_machines": 1,
+  "num_processes": 2,
+  "rdzv_backend": "static",
+  "same_network": true,
+  "tpu_env": [],
+  "tpu_use_cluster": false,
+  "tpu_use_sudo": false,
+  "use_cpu": false
+}
diff --git a/examples/train/multi-gpu/fsdp2_lora/train.sh b/examples/train/multi-gpu/fsdp2_lora/train.sh
@@ -0,0 +1,31 @@
+# 14.7GiB * 2
+nproc_per_node=2
+
+CUDA_VISIBLE_DEVICES=0,1 \
+accelerate launch --config_file "./examples/train/multi-gpu/fsdp2_lora/fsdp2.json" \
+    swift/cli/sft.py \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type lora \
+    --dataset 'swift/self-cognition#1000' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --gradient_checkpointing false \
+    --weight_decay 0.1 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot