From 6553d97d73730b50cf60a5bd40f07ccc584611aa Mon Sep 17 00:00:00 2001 From: "Shin, JaeHyeon" Date: Thu, 17 Jul 2025 21:28:39 +0900 Subject: [PATCH 1/3] docs: ko: how_to_hack_models.md --- docs/source/ko/_toctree.yml | 2 + docs/source/ko/how_to_hack_models.md | 152 +++++++++++++++++++++++++++ 2 files changed, 154 insertions(+) create mode 100644 docs/source/ko/how_to_hack_models.md diff --git a/docs/source/ko/_toctree.yml b/docs/source/ko/_toctree.yml index 75632c1f59a5..82e760d85524 100644 --- a/docs/source/ko/_toctree.yml +++ b/docs/source/ko/_toctree.yml @@ -21,6 +21,8 @@ title: πŸ€— Accelerate둜 λΆ„μ‚° ν•™μŠ΅ κ΅¬μ„±ν•˜κΈ° - local: peft title: πŸ€— PEFT둜 μ–΄λŒ‘ν„° λ‘œλ“œ 및 ν•™μŠ΅ν•˜κΈ° + - local: how_to_hack_models + title: λͺ¨λΈ ꡬ성 μš”μ†Œ 맞좀 μ„€μ •ν•˜κΈ° - local: model_sharing title: λ§Œλ“  λͺ¨λΈ κ³΅μœ ν•˜κΈ° - local: llm_tutorial diff --git a/docs/source/ko/how_to_hack_models.md b/docs/source/ko/how_to_hack_models.md new file mode 100644 index 000000000000..0a3c38a3e14f --- /dev/null +++ b/docs/source/ko/how_to_hack_models.md @@ -0,0 +1,152 @@ + + +# Customizing model components + +Another way to customize a model is to modify their components, rather than writing a new model entirely, allowing you to tailor a model to your specific use case. For example, you can add new layers or optimize the attention mechanism of an architecture. Customizations are applied directly to a Transformers model so that you can continue to use features such as [`Trainer`], [`PreTrainedModel`], and the [PEFT](https://huggingface.co/docs/peft/en/index) library. + +This guide will show you how to customize a models attention mechanism in order to apply [Low-Rank Adaptation (LoRA)](https://huggingface.co/docs/peft/conceptual_guides/adapter#low-rank-adaptation-lora) to it. + +> [!TIP] +> The [clear_import_cache](https://github.com/huggingface/transformers/blob/9985d06add07a4cc691dc54a7e34f54205c04d40/src/transformers/utils/import_utils.py#L2286) utility is very useful when you're iteratively modifying and developing model code. It removes all cached Transformers modules and allows Python to reload the modified code without constantly restarting your environment. +> +> ```py +> from transformers import AutoModel +> from transformers.utils.import_utils import clear_import_cache +> +> model = AutoModel.from_pretrained("bert-base-uncased") +> # modifications to model code +> # clear cache to reload modified code +> clear_import_cache() +> # re-import to use updated code +> model = AutoModel.from_pretrained("bert-base-uncased") +> ``` + +## Attention class + +[Segment Anything](./model_doc/sam) is an image segmentation model, and it combines the query-key-value (`qkv`) projection in its attention mechanisms. To reduce the number of trainable parameters and computational overhead, you can apply LoRA to the `qkv` projection. This requires splitting the `qkv` projection so that you can separately target the `q` and `v` with LoRA. + +1. Create a custom attention class, `SamVisionAttentionSplit`, by subclassing the original `SamVisionAttention` class. In the `__init__`, delete the combined `qkv` and create a separate linear layer for `q`, `k` and `v`. + +```py +import torch +import torch.nn as nn +from transformers.models.sam.modeling_sam import SamVisionAttention + +class SamVisionAttentionSplit(SamVisionAttention, nn.Module): + def __init__(self, config, window_size): + super().__init__(config, window_size) + # remove combined qkv + del self.qkv + # separate q, k, v projections + self.q = nn.Linear(config.hidden_size, config.hidden_size, bias=config.qkv_bias) + self.k = nn.Linear(config.hidden_size, config.hidden_size, bias=config.qkv_bias) + self.v = nn.Linear(config.hidden_size, config.hidden_size, bias=config.qkv_bias) + self._register_load_state_dict_pre_hook(self.split_q_k_v_load_hook) +``` + +2. The `_split_qkv_load_hook` function splits the pretrained `qkv` weights into separate `q`, `k`, and `v` weights when loading the model to ensure compatibility with any pretrained model. + +```py + def split_q_k_v_load_hook(self, state_dict, prefix, *args): + keys_to_delete = [] + for key in list(state_dict.keys()): + if "qkv." in key: + # split q, k, v from the combined projection + q, k, v = state_dict[key].chunk(3, dim=0) + # replace with individual q, k, v projections + state_dict[key.replace("qkv.", "q.")] = q + state_dict[key.replace("qkv.", "k.")] = k + state_dict[key.replace("qkv.", "v.")] = v + # mark the old qkv key for deletion + keys_to_delete.append(key) + + # remove old qkv keys + for key in keys_to_delete: + del state_dict[key] +``` + +3. In the `forward` pass, `q`, `k`, and `v` are computed separately while the rest of the attention mechanism remains the same. + +```py + def forward(self, hidden_states: torch.Tensor, output_attentions=False) -> torch.Tensor: + batch_size, height, width, _ = hidden_states.shape + qkv_shapes = (batch_size * self.num_attention_heads, height * width, -1) + query = self.q(hidden_states).reshape((batch_size, height * width,self.num_attention_heads, -1)).permute(0,2,1,3).reshape(qkv_shapes) + key = self.k(hidden_states).reshape((batch_size, height * width,self.num_attention_heads, -1)).permute(0,2,1,3).reshape(qkv_shapes) + value = self.v(hidden_states).reshape((batch_size, height * width,self.num_attention_heads, -1)).permute(0,2,1,3).reshape(qkv_shapes) + + attn_weights = (query * self.scale) @ key.transpose(-2, -1) + + attn_weights = torch.nn.functional.softmax(attn_weights, dtype=torch.float32, dim=-1).to(query.dtype) + attn_probs = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training) + attn_output = (attn_probs @ value).reshape(batch_size, self.num_attention_heads, height, width, -1) + attn_output = attn_output.permute(0, 2, 3, 1, 4).reshape(batch_size, height, width, -1) + attn_output = self.proj(attn_output) + + if output_attentions: + outputs = (attn_output, attn_weights) + else: + outputs = (attn_output, None) + return outputs +``` + +Assign the custom `SamVisionAttentionSplit` class to the original models `SamVisionAttention` module to replace it. All instances of `SamVisionAttention` in the model is replaced with the split attention version. + +Load the model with [`~PreTrainedModel.from_pretrained`]. + +```py +from transformers import SamModel + +# load the pretrained SAM model +model = SamModel.from_pretrained("facebook/sam-vit-base") + +# replace the attention class in the vision_encoder module +for layer in model.vision_encoder.layers: + if hasattr(layer, "attn"): + layer.attn = SamVisionAttentionSplit(model.config.vision_config, model.config.vision_config.window_size) +``` + +## LoRA + +With separate `q`, `k`, and `v` projections, apply LoRA to `q` and `v`. + +Create a [LoraConfig](https://huggingface.co/docs/peft/package_reference/config#peft.PeftConfig) and specify the rank `r`, `lora_alpha`, `lora_dropout`, `task_type`, and most importantly, the modules to target. + +```py +from peft import LoraConfig, get_peft_model + +config = LoraConfig( + r=16, + lora_alpha=32, + # apply LoRA to q and v + target_modules=["q", "v"], + lora_dropout=0.1, + task_type="FEATURE_EXTRACTION" +) +``` + +Pass the model and [LoraConfig](https://huggingface.co/docs/peft/package_reference/config#peft.PeftConfig) to [get_peft_model](https://huggingface.co/docs/peft/package_reference/peft_model#peft.get_peft_model) to apply LoRA to the model. + +```py +model = get_peft_model(model, config) +``` + +Call [print_trainable_parameters](https://huggingface.co/docs/peft/package_reference/peft_model#peft.PeftMixedModel.print_trainable_parameters) to view the number of parameters you're training as a result versus the total number of parameters. + +```py +model.print_trainable_parameters() +"trainable params: 589,824 || all params: 94,274,096 || trainable%: 0.6256" +``` \ No newline at end of file From ef9848c63d78d0ed2c614269cd9e04e32067abb2 Mon Sep 17 00:00:00 2001 From: "Shin, JaeHyeon" Date: Thu, 17 Jul 2025 21:32:20 +0900 Subject: [PATCH 2/3] feat: nmt draft --- docs/source/ko/how_to_hack_models.md | 60 ++++++++++++++-------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/docs/source/ko/how_to_hack_models.md b/docs/source/ko/how_to_hack_models.md index 0a3c38a3e14f..bd5c0f2dcf01 100644 --- a/docs/source/ko/how_to_hack_models.md +++ b/docs/source/ko/how_to_hack_models.md @@ -13,32 +13,32 @@ rendered properly in your Markdown viewer. --> -# Customizing model components +# λͺ¨λΈ ꡬ성 μš”μ†Œ 맞좀 μ„€μ •ν•˜κΈ°\[\[customizing-model-components]] -Another way to customize a model is to modify their components, rather than writing a new model entirely, allowing you to tailor a model to your specific use case. For example, you can add new layers or optimize the attention mechanism of an architecture. Customizations are applied directly to a Transformers model so that you can continue to use features such as [`Trainer`], [`PreTrainedModel`], and the [PEFT](https://huggingface.co/docs/peft/en/index) library. +λͺ¨λΈμ„ μ™„μ „νžˆ μƒˆλ‘œ μž‘μ„±ν•˜λŠ” λŒ€μ‹  ꡬ성 μš”μ†Œλ₯Ό μˆ˜μ •ν•˜μ—¬ λͺ¨λΈμ„ μ»€μŠ€ν„°λ§ˆμ΄μ§•ν•˜λŠ” 방법이 μžˆμŠ΅λ‹ˆλ‹€. 이λ₯Ό 톡해 νŠΉμ • μ‚¬μš© 사둀에 맞게 λͺ¨λΈμ„ μ‘°μ •ν•  수 μžˆμŠ΅λ‹ˆλ‹€. 예λ₯Ό λ“€μ–΄, μƒˆλ‘œμš΄ λ ˆμ΄μ–΄λ₯Ό μΆ”κ°€ν•˜κ±°λ‚˜ μ•„ν‚€ν…μ²˜μ˜ μ–΄ν…μ…˜ λ©”μ»€λ‹ˆμ¦˜μ„ μ΅œμ ν™”ν•  수 μžˆμŠ΅λ‹ˆλ‹€. μ΄λŸ¬ν•œ μ»€μŠ€ν„°λ§ˆμ΄μ§•μ€ `Transformers` λͺ¨λΈμ— 직접 μ μš©λ˜λ―€λ‘œ, \[`Trainer`], \[`PreTrainedModel`] 및 [PEFT](https://huggingface.co/docs/peft/en/index) λΌμ΄λΈŒλŸ¬λ¦¬μ™€ 같은 κΈ°λŠ₯을 계속 μ‚¬μš©ν•  수 μžˆμŠ΅λ‹ˆλ‹€. -This guide will show you how to customize a models attention mechanism in order to apply [Low-Rank Adaptation (LoRA)](https://huggingface.co/docs/peft/conceptual_guides/adapter#low-rank-adaptation-lora) to it. +이 κ°€μ΄λ“œμ—μ„œλŠ” λͺ¨λΈμ˜ μ–΄ν…μ…˜ λ©”μ»€λ‹ˆμ¦˜μ„ μ»€μŠ€ν„°λ§ˆμ΄μ§•ν•˜μ—¬ [Low-Rank Adaptation (LoRA)](https://huggingface.co/docs/peft/conceptual_guides/adapter#low-rank-adaptation-lora)λ₯Ό μ μš©ν•˜λŠ” 방법을 μ„€λͺ…ν•©λ‹ˆλ‹€. -> [!TIP] -> The [clear_import_cache](https://github.com/huggingface/transformers/blob/9985d06add07a4cc691dc54a7e34f54205c04d40/src/transformers/utils/import_utils.py#L2286) utility is very useful when you're iteratively modifying and developing model code. It removes all cached Transformers modules and allows Python to reload the modified code without constantly restarting your environment. +> \[!TIP] +> λͺ¨λΈ μ½”λ“œλ₯Ό 반볡적으둜 μˆ˜μ •ν•˜κ³  κ°œλ°œν•  λ•Œ [clear\_import\_cache](https://github.com/huggingface/transformers/blob/9985d06add07a4cc691dc54a7e34f54205c04d40/src/transformers/utils/import_utils.py#L2286) μœ ν‹Έλ¦¬ν‹°κ°€ 맀우 μœ μš©ν•©λ‹ˆλ‹€. 이 κΈ°λŠ₯은 μΊμ‹œλœ λͺ¨λ“  `Transformers` λͺ¨λ“ˆμ„ μ œκ±°ν•˜μ—¬ Python이 ν™˜κ²½μ„ μž¬μ‹œμž‘ν•˜μ§€ μ•Šκ³ λ„ μˆ˜μ •λœ μ½”λ“œλ₯Ό λ‹€μ‹œ λ‘œλ“œν•  수 μžˆλ„λ‘ ν•©λ‹ˆλ‹€. > > ```py > from transformers import AutoModel > from transformers.utils.import_utils import clear_import_cache > > model = AutoModel.from_pretrained("bert-base-uncased") -> # modifications to model code -> # clear cache to reload modified code +> # λͺ¨λΈ μ½”λ“œ μˆ˜μ • +> # μΊμ‹œλ₯Ό μ§€μ›Œ μˆ˜μ •λœ μ½”λ“œλ₯Ό λ‹€μ‹œ λ‘œλ“œ > clear_import_cache() -> # re-import to use updated code +> # μ—…λ°μ΄νŠΈλœ μ½”λ“œλ₯Ό μ‚¬μš©ν•˜κΈ° μœ„ν•΄ λ‹€μ‹œ μž„ν¬νŠΈ > model = AutoModel.from_pretrained("bert-base-uncased") > ``` -## Attention class +## μ–΄ν…μ…˜ 클래슀\[\[attention-class]] -[Segment Anything](./model_doc/sam) is an image segmentation model, and it combines the query-key-value (`qkv`) projection in its attention mechanisms. To reduce the number of trainable parameters and computational overhead, you can apply LoRA to the `qkv` projection. This requires splitting the `qkv` projection so that you can separately target the `q` and `v` with LoRA. +[Segment Anything](./model_doc/sam)은 이미지 λΆ„ν•  λͺ¨λΈλ‘œ, μ–΄ν…μ…˜ λ©”μ»€λ‹ˆμ¦˜μ—μ„œ query-key-value(`qkv`) ν”„λ‘œμ μ…˜μ„ κ²°ν•©ν•©λ‹ˆλ‹€. ν•™μŠ΅ κ°€λŠ₯ν•œ νŒŒλΌλ―Έν„° μˆ˜μ™€ μ—°μ‚° 뢀담을 쀄이기 μœ„ν•΄ `qkv` ν”„λ‘œμ μ…˜μ— LoRAλ₯Ό μ μš©ν•  수 μžˆμŠ΅λ‹ˆλ‹€. 이λ₯Ό μœ„ν•΄μ„œλŠ” `qkv` ν”„λ‘œμ μ…˜μ„ λΆ„λ¦¬ν•˜μ—¬ `q`와 `v`에 LoRAλ₯Ό κ°œλ³„μ μœΌλ‘œ μ μš©ν•΄μ•Ό ν•©λ‹ˆλ‹€. -1. Create a custom attention class, `SamVisionAttentionSplit`, by subclassing the original `SamVisionAttention` class. In the `__init__`, delete the combined `qkv` and create a separate linear layer for `q`, `k` and `v`. +1. μ›λž˜μ˜ `SamVisionAttention` 클래슀λ₯Ό μƒμ†ν•˜μ—¬ `SamVisionAttentionSplit`μ΄λΌλŠ” μ»€μŠ€ν…€ μ–΄ν…μ…˜ 클래슀λ₯Ό λ§Œλ“­λ‹ˆλ‹€. `__init__`μ—μ„œ κ²°ν•©λœ `qkv`λ₯Ό μ‚­μ œν•˜κ³ , `q`, `k`, `v`λ₯Ό μœ„ν•œ κ°œλ³„ μ„ ν˜• λ ˆμ΄μ–΄λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€. ```py import torch @@ -48,37 +48,37 @@ from transformers.models.sam.modeling_sam import SamVisionAttention class SamVisionAttentionSplit(SamVisionAttention, nn.Module): def __init__(self, config, window_size): super().__init__(config, window_size) - # remove combined qkv + # κ²°ν•©λœ qkv 제거 del self.qkv - # separate q, k, v projections + # q, k, v κ°œλ³„ ν”„λ‘œμ μ…˜ 생성 self.q = nn.Linear(config.hidden_size, config.hidden_size, bias=config.qkv_bias) self.k = nn.Linear(config.hidden_size, config.hidden_size, bias=config.qkv_bias) self.v = nn.Linear(config.hidden_size, config.hidden_size, bias=config.qkv_bias) self._register_load_state_dict_pre_hook(self.split_q_k_v_load_hook) ``` -2. The `_split_qkv_load_hook` function splits the pretrained `qkv` weights into separate `q`, `k`, and `v` weights when loading the model to ensure compatibility with any pretrained model. +2. `_split_qkv_load_hook` ν•¨μˆ˜λŠ” 사전 ν•™μŠ΅λœ `qkv` κ°€μ€‘μΉ˜λ₯Ό λͺ¨λΈ λ‘œλ”© μ‹œ `q`, `k`, `v`둜 λΆ„λ¦¬ν•˜μ—¬ κΈ°μ‘΄ 사전 ν•™μŠ΅ λͺ¨λΈκ³Όμ˜ ν˜Έν™˜μ„±μ„ 보μž₯ν•©λ‹ˆλ‹€. ```py def split_q_k_v_load_hook(self, state_dict, prefix, *args): keys_to_delete = [] for key in list(state_dict.keys()): if "qkv." in key: - # split q, k, v from the combined projection + # κ²°ν•©λœ ν”„λ‘œμ μ…˜μ—μ„œ q, k, v 뢄리 q, k, v = state_dict[key].chunk(3, dim=0) - # replace with individual q, k, v projections + # κ°œλ³„ q, k, v ν”„λ‘œμ μ…˜μœΌλ‘œ λŒ€μ²΄ state_dict[key.replace("qkv.", "q.")] = q state_dict[key.replace("qkv.", "k.")] = k state_dict[key.replace("qkv.", "v.")] = v - # mark the old qkv key for deletion + # κΈ°μ‘΄ qkv ν‚€ μ‚­μ œ 마크 keys_to_delete.append(key) - # remove old qkv keys + # κΈ°μ‘΄ qkv ν‚€ 제거 for key in keys_to_delete: del state_dict[key] ``` -3. In the `forward` pass, `q`, `k`, and `v` are computed separately while the rest of the attention mechanism remains the same. +3. `forward` λ‹¨κ³„μ—μ„œ `q`, `k`, `v`λŠ” κ°œλ³„μ μœΌλ‘œ κ³„μ‚°λ˜λ©°, μ–΄ν…μ…˜ λ©”μ»€λ‹ˆμ¦˜μ˜ λ‚˜λ¨Έμ§€ 뢀뢄은 λ™μΌν•˜κ²Œ μœ μ§€λ©λ‹ˆλ‹€. ```py def forward(self, hidden_states: torch.Tensor, output_attentions=False) -> torch.Tensor: @@ -103,27 +103,27 @@ class SamVisionAttentionSplit(SamVisionAttention, nn.Module): return outputs ``` -Assign the custom `SamVisionAttentionSplit` class to the original models `SamVisionAttention` module to replace it. All instances of `SamVisionAttention` in the model is replaced with the split attention version. +μ»€μŠ€ν…€ `SamVisionAttentionSplit` 클래슀λ₯Ό 원본 λͺ¨λΈμ˜ `SamVisionAttention` λͺ¨λ“ˆμ— ν• λ‹Ήν•˜μ—¬ κ΅μ²΄ν•©λ‹ˆλ‹€. λͺ¨λΈ λ‚΄ λͺ¨λ“  `SamVisionAttention` μΈμŠ€ν„΄μŠ€λŠ” λΆ„λ¦¬λœ μ–΄ν…μ…˜ λ²„μ „μœΌλ‘œ λŒ€μ²΄λ©λ‹ˆλ‹€. -Load the model with [`~PreTrainedModel.from_pretrained`]. +\[`~PreTrainedModel.from_pretrained`]둜 λͺ¨λΈμ„ λ‘œλ“œν•˜μ„Έμš”. ```py from transformers import SamModel -# load the pretrained SAM model +# 사전 ν•™μŠ΅λœ SAM λͺ¨λΈ λ‘œλ“œ model = SamModel.from_pretrained("facebook/sam-vit-base") -# replace the attention class in the vision_encoder module +# vision_encoder λͺ¨λ“ˆμ—μ„œ μ–΄ν…μ…˜ 클래슀 ꡐ체 for layer in model.vision_encoder.layers: if hasattr(layer, "attn"): layer.attn = SamVisionAttentionSplit(model.config.vision_config, model.config.vision_config.window_size) ``` -## LoRA +## LoRA\[\[lora]] -With separate `q`, `k`, and `v` projections, apply LoRA to `q` and `v`. +`q`, `k`, `v` ν”„λ‘œμ μ…˜μ„ λΆ„λ¦¬ν•œ ν›„, `q`와 `v`에 LoRAλ₯Ό μ μš©ν•©λ‹ˆλ‹€. -Create a [LoraConfig](https://huggingface.co/docs/peft/package_reference/config#peft.PeftConfig) and specify the rank `r`, `lora_alpha`, `lora_dropout`, `task_type`, and most importantly, the modules to target. +[LoraConfig](https://huggingface.co/docs/peft/package_reference/config#peft.PeftConfig)λ₯Ό μƒμ„±ν•˜κ³ , μˆœμœ„ `r`, `lora_alpha`, `lora_dropout`, `task_type`을 μ§€μ •ν•˜λ©°, κ°€μž₯ μ€‘μš”ν•œ `target_modules`λ₯Ό μ„€μ •ν•©λ‹ˆλ‹€. ```py from peft import LoraConfig, get_peft_model @@ -131,22 +131,22 @@ from peft import LoraConfig, get_peft_model config = LoraConfig( r=16, lora_alpha=32, - # apply LoRA to q and v + # q와 v에 LoRA 적용 target_modules=["q", "v"], lora_dropout=0.1, task_type="FEATURE_EXTRACTION" ) ``` -Pass the model and [LoraConfig](https://huggingface.co/docs/peft/package_reference/config#peft.PeftConfig) to [get_peft_model](https://huggingface.co/docs/peft/package_reference/peft_model#peft.get_peft_model) to apply LoRA to the model. +λͺ¨λΈκ³Ό [LoraConfig](https://huggingface.co/docs/peft/package_reference/config#peft.PeftConfig)λ₯Ό [get\_peft\_model](https://huggingface.co/docs/peft/package_reference/peft_model#peft.get_peft_model)에 μ „λ‹¬ν•˜μ—¬ λͺ¨λΈμ— LoRAλ₯Ό μ μš©ν•©λ‹ˆλ‹€. ```py model = get_peft_model(model, config) ``` -Call [print_trainable_parameters](https://huggingface.co/docs/peft/package_reference/peft_model#peft.PeftMixedModel.print_trainable_parameters) to view the number of parameters you're training as a result versus the total number of parameters. +[print\_trainable\_parameters](https://huggingface.co/docs/peft/package_reference/peft_model#peft.PeftMixedModel.print_trainable_parameters)λ₯Ό ν˜ΈμΆœν•˜μ—¬ ν›ˆλ ¨λ˜λŠ” νŒŒλΌλ―Έν„° μˆ˜μ™€ 전체 νŒŒλΌλ―Έν„° λŒ€λΉ„ λΉ„μœ¨μ„ ν™•μΈν•˜μ„Έμš”. ```py model.print_trainable_parameters() "trainable params: 589,824 || all params: 94,274,096 || trainable%: 0.6256" -``` \ No newline at end of file +``` From 2db19327e1d9af060836cab6ff874df628d90f7c Mon Sep 17 00:00:00 2001 From: "Shin, JaeHyeon" Date: Sun, 20 Jul 2025 22:42:31 +0900 Subject: [PATCH 3/3] fix: manual edits --- docs/source/ko/how_to_hack_models.md | 38 ++++++++++++++-------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/source/ko/how_to_hack_models.md b/docs/source/ko/how_to_hack_models.md index bd5c0f2dcf01..9ef4839335f3 100644 --- a/docs/source/ko/how_to_hack_models.md +++ b/docs/source/ko/how_to_hack_models.md @@ -13,14 +13,14 @@ rendered properly in your Markdown viewer. --> -# λͺ¨λΈ ꡬ성 μš”μ†Œ 맞좀 μ„€μ •ν•˜κΈ°\[\[customizing-model-components]] +# λͺ¨λΈ ꡬ성 μš”μ†Œ 맞좀 μ„€μ •ν•˜κΈ°[[customizing-model-components]] -λͺ¨λΈμ„ μ™„μ „νžˆ μƒˆλ‘œ μž‘μ„±ν•˜λŠ” λŒ€μ‹  ꡬ성 μš”μ†Œλ₯Ό μˆ˜μ •ν•˜μ—¬ λͺ¨λΈμ„ μ»€μŠ€ν„°λ§ˆμ΄μ§•ν•˜λŠ” 방법이 μžˆμŠ΅λ‹ˆλ‹€. 이λ₯Ό 톡해 νŠΉμ • μ‚¬μš© 사둀에 맞게 λͺ¨λΈμ„ μ‘°μ •ν•  수 μžˆμŠ΅λ‹ˆλ‹€. 예λ₯Ό λ“€μ–΄, μƒˆλ‘œμš΄ λ ˆμ΄μ–΄λ₯Ό μΆ”κ°€ν•˜κ±°λ‚˜ μ•„ν‚€ν…μ²˜μ˜ μ–΄ν…μ…˜ λ©”μ»€λ‹ˆμ¦˜μ„ μ΅œμ ν™”ν•  수 μžˆμŠ΅λ‹ˆλ‹€. μ΄λŸ¬ν•œ μ»€μŠ€ν„°λ§ˆμ΄μ§•μ€ `Transformers` λͺ¨λΈμ— 직접 μ μš©λ˜λ―€λ‘œ, \[`Trainer`], \[`PreTrainedModel`] 및 [PEFT](https://huggingface.co/docs/peft/en/index) λΌμ΄λΈŒλŸ¬λ¦¬μ™€ 같은 κΈ°λŠ₯을 계속 μ‚¬μš©ν•  수 μžˆμŠ΅λ‹ˆλ‹€. +λͺ¨λΈμ„ μ™„μ „νžˆ μƒˆλ‘œ μž‘μ„±ν•˜λŠ” λŒ€μ‹  ꡬ성 μš”μ†Œλ₯Ό μˆ˜μ •ν•˜μ—¬ λͺ¨λΈμ„ 맞좀 μ„€μ •ν•˜λŠ” 방법이 μžˆμŠ΅λ‹ˆλ‹€. 이 λ°©λ²•μœΌλ‘œ λͺ¨λΈμ„ νŠΉμ • μ‚¬μš© 사둀에 맞게 λͺ¨λΈμ„ μ‘°μ •ν•  수 μžˆμŠ΅λ‹ˆλ‹€. 예λ₯Ό λ“€μ–΄, μƒˆλ‘œμš΄ λ ˆμ΄μ–΄λ₯Ό μΆ”κ°€ν•˜κ±°λ‚˜ μ•„ν‚€ν…μ²˜μ˜ μ–΄ν…μ…˜ λ©”μ»€λ‹ˆμ¦˜μ„ μ΅œμ ν™”ν•  수 μžˆμŠ΅λ‹ˆλ‹€. μ΄λŸ¬ν•œ 맞좀 섀정은 트랜슀포머 λͺ¨λΈμ— 직접 μ μš©λ˜λ―€λ‘œ, [`Trainer`], [`PreTrainedModel`] 및 [PEFT](https://huggingface.co/docs/peft/en/index) λΌμ΄λΈŒλŸ¬λ¦¬μ™€ 같은 κΈ°λŠ₯을 계속 μ‚¬μš©ν•  수 μžˆμŠ΅λ‹ˆλ‹€. -이 κ°€μ΄λ“œμ—μ„œλŠ” λͺ¨λΈμ˜ μ–΄ν…μ…˜ λ©”μ»€λ‹ˆμ¦˜μ„ μ»€μŠ€ν„°λ§ˆμ΄μ§•ν•˜μ—¬ [Low-Rank Adaptation (LoRA)](https://huggingface.co/docs/peft/conceptual_guides/adapter#low-rank-adaptation-lora)λ₯Ό μ μš©ν•˜λŠ” 방법을 μ„€λͺ…ν•©λ‹ˆλ‹€. +이 κ°€μ΄λ“œμ—μ„œλŠ” λͺ¨λΈμ˜ μ–΄ν…μ…˜ λ©”μ»€λ‹ˆμ¦˜μ„ 맞좀 μ„€μ •ν•˜μ—¬ [Low-Rank Adaptation (LoRA)](https://huggingface.co/docs/peft/conceptual_guides/adapter#low-rank-adaptation-lora)λ₯Ό μ μš©ν•˜λŠ” 방법을 μ„€λͺ…ν•©λ‹ˆλ‹€. -> \[!TIP] -> λͺ¨λΈ μ½”λ“œλ₯Ό 반볡적으둜 μˆ˜μ •ν•˜κ³  κ°œλ°œν•  λ•Œ [clear\_import\_cache](https://github.com/huggingface/transformers/blob/9985d06add07a4cc691dc54a7e34f54205c04d40/src/transformers/utils/import_utils.py#L2286) μœ ν‹Έλ¦¬ν‹°κ°€ 맀우 μœ μš©ν•©λ‹ˆλ‹€. 이 κΈ°λŠ₯은 μΊμ‹œλœ λͺ¨λ“  `Transformers` λͺ¨λ“ˆμ„ μ œκ±°ν•˜μ—¬ Python이 ν™˜κ²½μ„ μž¬μ‹œμž‘ν•˜μ§€ μ•Šκ³ λ„ μˆ˜μ •λœ μ½”λ“œλ₯Ό λ‹€μ‹œ λ‘œλ“œν•  수 μžˆλ„λ‘ ν•©λ‹ˆλ‹€. +> [!TIP] +> λͺ¨λΈ μ½”λ“œλ₯Ό 반볡적으둜 μˆ˜μ •ν•˜κ³  κ°œλ°œν•  λ•Œ [clear_import_cache](https://github.com/huggingface/transformers/blob/9985d06add07a4cc691dc54a7e34f54205c04d40/src/transformers/utils/import_utils.py#L2286) μœ ν‹Έλ¦¬ν‹°κ°€ 맀우 μœ μš©ν•©λ‹ˆλ‹€. 이 κΈ°λŠ₯은 μΊμ‹œλœ λͺ¨λ“  트랜슀포머 λͺ¨λ“ˆμ„ μ œκ±°ν•˜μ—¬ Python이 ν™˜κ²½μ„ μž¬μ‹œμž‘ν•˜μ§€ μ•Šκ³ λ„ μˆ˜μ •λœ μ½”λ“œλ₯Ό λ‹€μ‹œ κ°€μ Έμ˜¬ 수 μžˆλ„λ‘ ν•©λ‹ˆλ‹€. > > ```py > from transformers import AutoModel @@ -28,17 +28,17 @@ rendered properly in your Markdown viewer. > > model = AutoModel.from_pretrained("bert-base-uncased") > # λͺ¨λΈ μ½”λ“œ μˆ˜μ • -> # μΊμ‹œλ₯Ό μ§€μ›Œ μˆ˜μ •λœ μ½”λ“œλ₯Ό λ‹€μ‹œ λ‘œλ“œ +> # μΊμ‹œλ₯Ό μ§€μ›Œ μˆ˜μ •λœ μ½”λ“œλ₯Ό λ‹€μ‹œ κ°€μ Έμ˜€κΈ° > clear_import_cache() -> # μ—…λ°μ΄νŠΈλœ μ½”λ“œλ₯Ό μ‚¬μš©ν•˜κΈ° μœ„ν•΄ λ‹€μ‹œ μž„ν¬νŠΈ +> # μ—…λ°μ΄νŠΈλœ μ½”λ“œλ₯Ό μ‚¬μš©ν•˜κΈ° μœ„ν•΄ λ‹€μ‹œ κ°€μ Έμ˜€κΈ° > model = AutoModel.from_pretrained("bert-base-uncased") > ``` -## μ–΄ν…μ…˜ 클래슀\[\[attention-class]] +## μ–΄ν…μ…˜ 클래슀[[attention-class]] [Segment Anything](./model_doc/sam)은 이미지 λΆ„ν•  λͺ¨λΈλ‘œ, μ–΄ν…μ…˜ λ©”μ»€λ‹ˆμ¦˜μ—μ„œ query-key-value(`qkv`) ν”„λ‘œμ μ…˜μ„ κ²°ν•©ν•©λ‹ˆλ‹€. ν•™μŠ΅ κ°€λŠ₯ν•œ νŒŒλΌλ―Έν„° μˆ˜μ™€ μ—°μ‚° 뢀담을 쀄이기 μœ„ν•΄ `qkv` ν”„λ‘œμ μ…˜μ— LoRAλ₯Ό μ μš©ν•  수 μžˆμŠ΅λ‹ˆλ‹€. 이λ₯Ό μœ„ν•΄μ„œλŠ” `qkv` ν”„λ‘œμ μ…˜μ„ λΆ„λ¦¬ν•˜μ—¬ `q`와 `v`에 LoRAλ₯Ό κ°œλ³„μ μœΌλ‘œ μ μš©ν•΄μ•Ό ν•©λ‹ˆλ‹€. -1. μ›λž˜μ˜ `SamVisionAttention` 클래슀λ₯Ό μƒμ†ν•˜μ—¬ `SamVisionAttentionSplit`μ΄λΌλŠ” μ»€μŠ€ν…€ μ–΄ν…μ…˜ 클래슀λ₯Ό λ§Œλ“­λ‹ˆλ‹€. `__init__`μ—μ„œ κ²°ν•©λœ `qkv`λ₯Ό μ‚­μ œν•˜κ³ , `q`, `k`, `v`λ₯Ό μœ„ν•œ κ°œλ³„ μ„ ν˜• λ ˆμ΄μ–΄λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€. +1. μ›λž˜μ˜ `SamVisionAttention` 클래슀λ₯Ό μƒμ†ν•˜μ—¬ `SamVisionAttentionSplit`μ΄λΌλŠ” μ‚¬μš©μž μ •μ˜ μ–΄ν…μ…˜ 클래슀λ₯Ό λ§Œλ“­λ‹ˆλ‹€. `__init__`μ—μ„œ κ²°ν•©λœ `qkv`λ₯Ό μ‚­μ œν•˜κ³ , `q`, `k`, `v`λ₯Ό μœ„ν•œ κ°œλ³„ μ„ ν˜• λ ˆμ΄μ–΄λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€. ```py import torch @@ -57,7 +57,7 @@ class SamVisionAttentionSplit(SamVisionAttention, nn.Module): self._register_load_state_dict_pre_hook(self.split_q_k_v_load_hook) ``` -2. `_split_qkv_load_hook` ν•¨μˆ˜λŠ” 사전 ν•™μŠ΅λœ `qkv` κ°€μ€‘μΉ˜λ₯Ό λͺ¨λΈ λ‘œλ”© μ‹œ `q`, `k`, `v`둜 λΆ„λ¦¬ν•˜μ—¬ κΈ°μ‘΄ 사전 ν•™μŠ΅ λͺ¨λΈκ³Όμ˜ ν˜Έν™˜μ„±μ„ 보μž₯ν•©λ‹ˆλ‹€. +2. `_split_qkv_load_hook` ν•¨μˆ˜λŠ” λͺ¨λΈμ„ κ°€μ Έμ˜¬ λ•Œ, 사전 ν›ˆλ ¨λœ `qkv` κ°€μ€‘μΉ˜λ₯Ό `q`, `k`, `v`둜 λΆ„λ¦¬ν•˜μ—¬ 사전 ν›ˆλ ¨λœ λͺ¨λΈκ³Όμ˜ ν˜Έν™˜μ„±μ„ 보μž₯ν•©λ‹ˆλ‹€. ```py def split_q_k_v_load_hook(self, state_dict, prefix, *args): @@ -70,7 +70,7 @@ class SamVisionAttentionSplit(SamVisionAttention, nn.Module): state_dict[key.replace("qkv.", "q.")] = q state_dict[key.replace("qkv.", "k.")] = k state_dict[key.replace("qkv.", "v.")] = v - # κΈ°μ‘΄ qkv ν‚€ μ‚­μ œ 마크 + # κΈ°μ‘΄ qkv ν‚€λ₯Ό μ‚­μ œ λŒ€μƒμœΌλ‘œ ν‘œμ‹œ keys_to_delete.append(key) # κΈ°μ‘΄ qkv ν‚€ 제거 @@ -103,27 +103,27 @@ class SamVisionAttentionSplit(SamVisionAttention, nn.Module): return outputs ``` -μ»€μŠ€ν…€ `SamVisionAttentionSplit` 클래슀λ₯Ό 원본 λͺ¨λΈμ˜ `SamVisionAttention` λͺ¨λ“ˆμ— ν• λ‹Ήν•˜μ—¬ κ΅μ²΄ν•©λ‹ˆλ‹€. λͺ¨λΈ λ‚΄ λͺ¨λ“  `SamVisionAttention` μΈμŠ€ν„΄μŠ€λŠ” λΆ„λ¦¬λœ μ–΄ν…μ…˜ λ²„μ „μœΌλ‘œ λŒ€μ²΄λ©λ‹ˆλ‹€. +μ‚¬μš©μž μ •μ˜ `SamVisionAttentionSplit` 클래슀λ₯Ό 원본 λͺ¨λΈμ˜ `SamVisionAttention` λͺ¨λ“ˆμ— ν• λ‹Ήν•˜μ—¬ κ΅μ²΄ν•©λ‹ˆλ‹€. λͺ¨λΈ λ‚΄ λͺ¨λ“  `SamVisionAttention` μΈμŠ€ν„΄μŠ€λŠ” λΆ„λ¦¬λœ μ–΄ν…μ…˜ λ²„μ „μœΌλ‘œ λŒ€μ²΄λ©λ‹ˆλ‹€. -\[`~PreTrainedModel.from_pretrained`]둜 λͺ¨λΈμ„ λ‘œλ“œν•˜μ„Έμš”. +[`~PreTrainedModel.from_pretrained`]둜 λͺ¨λΈμ„ κ°€μ Έμ˜€μ„Έμš”. ```py from transformers import SamModel -# 사전 ν•™μŠ΅λœ SAM λͺ¨λΈ λ‘œλ“œ +# 사전 ν›ˆλ ¨λœ SAM λͺ¨λΈ κ°€μ Έμ˜€κΈ° model = SamModel.from_pretrained("facebook/sam-vit-base") -# vision_encoder λͺ¨λ“ˆμ—μ„œ μ–΄ν…μ…˜ 클래슀 ꡐ체 +# λΉ„μ „-인코더 λͺ¨λ“ˆμ—μ„œ μ–΄ν…μ…˜ 클래슀 ꡐ체 for layer in model.vision_encoder.layers: if hasattr(layer, "attn"): layer.attn = SamVisionAttentionSplit(model.config.vision_config, model.config.vision_config.window_size) ``` -## LoRA\[\[lora]] +## LoRA[[lora]] -`q`, `k`, `v` ν”„λ‘œμ μ…˜μ„ λΆ„λ¦¬ν•œ ν›„, `q`와 `v`에 LoRAλ₯Ό μ μš©ν•©λ‹ˆλ‹€. +λΆ„λ¦¬λœ `q`, `k`, `v` ν”„λ‘œμ μ…˜μ„ μ‚¬μš©ν•  λ•Œ , `q`와 `v`에 LoRAλ₯Ό μ μš©ν•©λ‹ˆλ‹€. -[LoraConfig](https://huggingface.co/docs/peft/package_reference/config#peft.PeftConfig)λ₯Ό μƒμ„±ν•˜κ³ , μˆœμœ„ `r`, `lora_alpha`, `lora_dropout`, `task_type`을 μ§€μ •ν•˜λ©°, κ°€μž₯ μ€‘μš”ν•œ `target_modules`λ₯Ό μ„€μ •ν•©λ‹ˆλ‹€. +[LoraConfig](https://huggingface.co/docs/peft/package_reference/config#peft.PeftConfig)λ₯Ό μƒμ„±ν•˜κ³ , 랭크 `r`, `lora_alpha`, `lora_dropout`, `task_type`, 그리고 κ°€μž₯ μ€‘μš”ν•œ 적용될 λͺ¨λ“ˆμ„ μ§€μ •ν•©λ‹ˆλ‹€. ```py from peft import LoraConfig, get_peft_model @@ -144,7 +144,7 @@ config = LoraConfig( model = get_peft_model(model, config) ``` -[print\_trainable\_parameters](https://huggingface.co/docs/peft/package_reference/peft_model#peft.PeftMixedModel.print_trainable_parameters)λ₯Ό ν˜ΈμΆœν•˜μ—¬ ν›ˆλ ¨λ˜λŠ” νŒŒλΌλ―Έν„° μˆ˜μ™€ 전체 νŒŒλΌλ―Έν„° λŒ€λΉ„ λΉ„μœ¨μ„ ν™•μΈν•˜μ„Έμš”. +[print_trainable_parameters](https://huggingface.co/docs/peft/package_reference/peft_model#peft.PeftMixedModel.print_trainable_parameters)λ₯Ό ν˜ΈμΆœν•˜μ—¬ 전체 νŒŒλΌλ―Έν„° 수 λŒ€λΉ„ ν›ˆλ ¨λ˜λŠ” νŒŒλΌλ―Έν„° 수λ₯Ό ν™•μΈν•˜μ„Έμš”. ```py model.print_trainable_parameters()