|
10 | 10 | }, |
11 | 11 | { |
12 | 12 | "cell_type": "code", |
13 | | - "execution_count": 1, |
| 13 | + "execution_count": null, |
14 | 14 | "id": "d58c720d", |
15 | 15 | "metadata": {}, |
16 | 16 | "outputs": [], |
17 | 17 | "source": [ |
18 | 18 | "import json\n", |
19 | 19 | "import os\n", |
20 | 20 | "from datetime import date\n", |
21 | | - "from medcat.cat import CAT\n", |
22 | 21 | "from medcat.meta_cat import MetaCAT\n", |
23 | | - "from medcat.config_meta_cat import ConfigMetaCAT\n", |
24 | | - "from medcat.tokenizers.meta_cat_tokenizers import TokenizerWrapperBPE, TokenizerWrapperBERT\n", |
25 | | - "from tokenizers import ByteLevelBPETokenizer" |
| 22 | + "from medcat.config_meta_cat import ConfigMetaCAT" |
26 | 23 | ] |
27 | 24 | }, |
28 | 25 | { |
|
88 | 85 | }, |
89 | 86 | { |
90 | 87 | "cell_type": "markdown", |
91 | | - "id": "35aa5605", |
| 88 | + "id": "d4a3632b", |
| 89 | + "metadata": {}, |
| 90 | + "source": [ |
| 91 | + "<b> Note: </b> \n", |
| 92 | + " The name for the classification task can vary. <br> E.g: Task name for 'Experiencer' can be 'Subject'.\n", |
| 93 | + " <br><br>To accomodate for this, we have a list that stores the variations for the alternate names. This attribute can be found under `mc.config.general.alternative_category_names`\n", |
| 94 | + "<br> E.g. for Experiencer, it will be pre-loaded as alternative_category_names = ['Experiencer','Subject']" |
| 95 | + ] |
| 96 | + }, |
| 97 | + { |
| 98 | + "cell_type": "markdown", |
| 99 | + "id": "d8bdc404", |
92 | 100 | "metadata": {}, |
93 | 101 | "source": [ |
94 | | - "Before you run the next section please double check that the model meta_annotation names matches to those specified in the mct export.\n", |
95 | | - "\n" |
| 102 | + "<b> Note: </b> \n", |
| 103 | + " The name for the classes can vary too. <br> E.g: For Presence task, the class name can be 'Not present (False)' or 'False'\n", |
| 104 | + " <br><br>To accomodate for this, we have a mapping that stores the variations for the alternate names. This attribute can be found under `mc.config.general.alternative_class_names`\n", |
| 105 | + "<br> E.g. for Presence, it will be pre-loaded as alternative_class_names = [[\"Hypothetical (N/A)\",\"Hypothetical\"],[\"Not present (False)\",\"False\"],[\"Present (True)\",\"True\"]]" |
96 | 106 | ] |
97 | 107 | }, |
98 | 108 | { |
|
183 | 193 | " if class_wt_phase1:\n", |
184 | 194 | " mc.config.train['class_weights'] = class_wt_phase1\n", |
185 | 195 | "\n", |
186 | | - " mc.config.train['nepochs'] = 30 #You can change the number of epochs, remember to keep them higher for phase 1\n", |
| 196 | + " #You can change the number of epochs, remember to keep them higher for phase 1\n", |
| 197 | + " mc.config.train['nepochs'] = 40 \n", |
| 198 | + "\n", |
| 199 | + " # current model will be overwritten\n", |
| 200 | + " save_dir_path = os.path.join(base_dir_meta_models,\"meta_\"+meta_model)\n", |
| 201 | + " # to save the new model elsewhere, uncomment the below line\n", |
| 202 | + " #save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. \n", |
187 | 203 | "\n", |
188 | | - " save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. \n", |
189 | 204 | " results = mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)\n", |
190 | 205 | " # Save results\n", |
191 | 206 | " json.dump(results['report'], open(os.path.join(save_dir_path,'meta_'+meta_model+'_results_phase1.json'), 'w'))\n", |
|
202 | 217 | " if class_wt_phase2:\n", |
203 | 218 | " mc.config.train['class_weights'] = class_wt_phase2\n", |
204 | 219 | "\n", |
205 | | - " mc.config.train['nepochs'] = 15\n", |
| 220 | + " #You can change the number of epochs\n", |
| 221 | + " mc.config.train['nepochs'] = 20\n", |
206 | 222 | "\n", |
207 | | - " save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. Ensure to keep this same as Phase 1\n", |
| 223 | + " # Where to save the meta_model and results. Ensure to keep this same as Phase 1\n", |
| 224 | + " save_dir_path = os.path.join(base_dir_meta_models,\"meta_\"+meta_model)\n", |
| 225 | + " \n", |
208 | 226 | " results = mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)\n", |
209 | 227 | " # Save results\n", |
210 | 228 | " json.dump(results['report'], open(os.path.join(save_dir_path,'meta_'+meta_model+'_results_phase2.json'), 'w'))\n", |
211 | 229 | "\n", |
212 | 230 | "#--------------------------------Driver--------------------------------\n", |
213 | 231 | "for meta_model in meta_model_names:\n", |
214 | | - " #To use your own class weights instead of the pre-defined ones for the 2 phases, uncomment the below lines\n", |
215 | | - " '''class_wt_phase1 = []\n", |
216 | | - " class_wt_phase2 = []'''\n", |
| 232 | + " #To use your own class weights instead of the pre-defined ones for the 2 phases, put the weights in the lists below\n", |
| 233 | + " class_wt_phase1 = [] # Example [0.4,0.4,0.2]\n", |
| 234 | + " class_wt_phase2 = [] # Example [0.4,0.3,0.3]\n", |
217 | 235 | "\n", |
218 | 236 | " # Train 2 phase learning\n", |
219 | 237 | " logger.info(\"\\n********************Beginning Phase 1********************\")\n", |
|
257 | 275 | "# Follow all the same steps till initializing the metacat model\n", |
258 | 276 | "\n", |
259 | 277 | "# Initialise and train meta_model\n", |
260 | | - "mc = MetaCAT(tokenizer=tokenizer, embeddings=None, config=config)\n", |
| 278 | + "mc = MetaCAT.load(save_dir_path=os.path.join(base_dir_meta_models,\"meta_\"+meta_model))\n", |
261 | 279 | "\n", |
262 | 280 | "# the format expected is [[['text','of','the','document'], [index of medical entity], \"label\" ],\n", |
263 | 281 | "# ['text','of','the','document'], [index of medical entity], \"label\" ]]\n", |
|
0 commit comments