update

minmingzhu · minmingzhu · commit a3284947e06f · 2024-05-06T10:52:53.000+08:00
Signed-off-by: minmingzhu &lt;minming.zhu@intel.com&gt;
diff --git a/llm_on_ray/finetune/finetune_config.py b/llm_on_ray/finetune/finetune_config.py
@@ -166,6 +166,7 @@ class FinetuneConfig(BaseModel):
     Dataset: Dataset
     Training: Training
 
+
 base_models: Dict[str, FinetuneConfig] = {}
 _models: Dict[str, FinetuneConfig] = {}
 
@@ -177,6 +178,6 @@ class FinetuneConfig(BaseModel):
         continue
     with open(file_path, "r") as f:
         m: FinetuneConfig = parse_yaml_raw_as(FinetuneConfig, f)
-        _models[m.name] = m
+        _models[m.General.base_model] = m
 
 all_models = _models.copy()
diff --git a/llm_on_ray/ui/start_ui.py b/llm_on_ray/ui/start_ui.py
@@ -110,20 +110,20 @@ def get_result(self):
 
 class ChatBotUI:
     def __init__(
-            self,
-            all_models: Dict[str, InferenceConfig],
-            base_models: Dict[str, FinetuneConfig],
-            finetune_model_path: str,
-            finetuned_checkpoint_path: str,
-            repo_code_path: str,
-            default_data_path: str,
-            default_rag_path: str,
-            config: dict,
-            head_node_ip: str,
-            node_port: str,
-            node_user_name: str,
-            conda_env_name: str,
-            master_ip_port: str,
+        self,
+        all_models: Dict[str, InferenceConfig],
+        base_models: Dict[str, FinetuneConfig],
+        finetune_model_path: str,
+        finetuned_checkpoint_path: str,
+        repo_code_path: str,
+        default_data_path: str,
+        default_rag_path: str,
+        config: dict,
+        head_node_ip: str,
+        node_port: str,
+        node_user_name: str,
+        conda_env_name: str,
+        master_ip_port: str,
     ):
         self._all_models = all_models
         self._base_models = base_models
@@ -556,14 +556,15 @@ def finetune(
             finetune_config = self._base_models[model_name]
             gpt_base_model = finetune_config.General.gpt_base_model
 
-
         finetune_config = finetune_config.dict()
         last_gpt_base_model = False
         finetuned_model_path = os.path.join(self.finetuned_model_path, model_name, new_model_name)
 
         exist_worker = int(finetune_config["Training"].get("num_training_workers"))
 
-        exist_cpus_per_worker_ftn = int(finetune_config["Training"].get("resources_per_worker")["CPU"])
+        exist_cpus_per_worker_ftn = int(
+            finetune_config["Training"].get("resources_per_worker")["CPU"]
+        )
 
         ray_resources = ray.available_resources()
         if "CPU" not in ray_resources or cpus_per_worker_ftn * worker_num + 1 > int(
@@ -602,9 +603,9 @@ def finetune(
 
         finetune_config["Dataset"]["train_file"] = dataset
         if origin_model_path is not None:
-             finetune_config["General"]["base_model"] = origin_model_path
+            finetune_config["General"]["base_model"] = origin_model_path
         if tokenizer_path is not None:
-             finetune_config["General"]["tokenizer_name"] = tokenizer_path
+            finetune_config["General"]["tokenizer_name"] = tokenizer_path
         finetune_config["Training"]["epochs"] = num_epochs
         finetune_config["General"]["output_dir"] = finetuned_model_path
 
@@ -698,30 +699,30 @@ def finetune_progress(self, progress=gr.Progress()):
                 progress(
                     float(int(value_step) / int(total_steps)),
                     desc="Start Training: epoch "
-                         + str(value_epoch)
-                         + " / "
-                         + str(total_epochs)
-                         + "  "
-                         + "step "
-                         + str(value_step)
-                         + " / "
-                         + str(total_steps),
+                    + str(value_epoch)
+                    + " / "
+                    + str(total_epochs)
+                    + "  "
+                    + "step "
+                    + str(value_step)
+                    + " / "
+                    + str(total_steps),
                 )
             except Exception:
                 pass
         self.finetune_status = False
         return "<h4 style='text-align: left; margin-bottom: 1rem'>Completed the fine-tuning process.</h4>"
 
     def deploy_func(
-            self,
-            model_name: str,
-            replica_num: int,
-            cpus_per_worker_deploy: int,
-            hpus_per_worker_deploy: int,
+        self,
+        model_name: str,
+        replica_num: int,
+        cpus_per_worker_deploy: int,
+        hpus_per_worker_deploy: int,
     ):
         self.shutdown_deploy()
         if cpus_per_worker_deploy * replica_num > int(
-                ray.available_resources()["CPU"]
+            ray.available_resources()["CPU"]
         ) or hpus_per_worker_deploy * replica_num > int(
             ray.available_resources()["HPU"] if "HPU" in ray.available_resources() else 0
         ):