Skip to content

Commit ad5ebac

Browse files
Pushing update for metacat training (#18)
* Pushing update for metacat training Revamped the training for both basic and advanced notebooks. * Pushing update - removing demo
1 parent 8286c3e commit ad5ebac

File tree

2 files changed

+84
-2568
lines changed

2 files changed

+84
-2568
lines changed

medcat/2_train_model/2_supervised_training/meta_annotation_training.ipynb

Lines changed: 9 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@
114114
"id": "83701c19",
115115
"metadata": {},
116116
"source": [
117-
"# For LSTM model"
117+
"# For LSTM and BERT model"
118118
]
119119
},
120120
{
@@ -125,68 +125,23 @@
125125
"outputs": [],
126126
"source": [
127127
"for meta_model in meta_model_names:\n",
128-
" vocab_file = os.path.join(base_dir_meta_models,\"meta_\"+meta_model,'bbpe-vocab.json')\n",
129-
" merges_file = os.path.join(base_dir_meta_models,\"meta_\"+meta_model,'bbpe-merges.txt')\n",
130-
" tokenizer = TokenizerWrapperBPE(ByteLevelBPETokenizer(vocab=vocab_file,\n",
131-
" merges=merges_file,\n",
132-
" lowercase=True))\n",
133-
" # load and sort out the config\n",
134-
" config_file = os.path.join(base_dir_meta_models,\"meta_\"+meta_model,\"config.json\")\n",
135-
" with open(config_file, 'r') as jfile:\n",
136-
" config_dict = json.load(jfile)\n",
137-
" config = ConfigMetaCAT()\n",
138-
" for key, value in config_dict.items():\n",
139-
" setattr(config, key, value['py/state']['__dict__'])\n",
140-
" \n",
128+
" \n",
129+
" # load the meta_model\n",
130+
" mc = MetaCAT.load(save_dir_path=os.path.join(base_dir_meta_models,\"meta_\"+meta_model))\n",
131+
"\n",
132+
" # changing parameters\n",
133+
" mc.config.train['nepochs'] = 15\n",
134+
"\n",
141135
" save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. \n",
142136
" #Ideally this should replace the meta_models inside the modelpack\n",
143137
"\n",
144-
" # Initialise and train meta_model\n",
145-
" mc = MetaCAT(tokenizer=tokenizer, embeddings=None, config=config)\n",
138+
" # train the meta_model\n",
146139
" results = mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)\n",
147140
" \n",
148141
" # Save results\n",
149142
" json.dump(results['report'], open(os.path.join(save_dir_path,'meta_'+meta_model+'_results.json'), 'w'))"
150143
]
151144
},
152-
{
153-
"cell_type": "markdown",
154-
"id": "91ff4e28",
155-
"metadata": {},
156-
"source": [
157-
"# For BERT model"
158-
]
159-
},
160-
{
161-
"cell_type": "code",
162-
"execution_count": null,
163-
"id": "e255dda2",
164-
"metadata": {},
165-
"outputs": [],
166-
"source": [
167-
"for meta_model in meta_model_names:\n",
168-
" # load and sort out the config\n",
169-
" config_file = os.path.join(base_dir_meta_models,\"meta_\"+meta_model,\"config.json\")\n",
170-
" with open(config_file, 'r') as jfile:\n",
171-
" config_dict = json.load(jfile)\n",
172-
" config = ConfigMetaCAT()\n",
173-
" for key, value in config_dict.items():\n",
174-
" setattr(config, key, value['py/state']['__dict__'])\n",
175-
"\n",
176-
" tokenizer = TokenizerWrapperBERT.load(os.path.join(base_dir_meta_models,\"meta_\"+meta_model), \n",
177-
" config.model['model_variant'])\n",
178-
" \n",
179-
" # change model name if training BERT for the first time\n",
180-
" config.model['model_name'] = 'bert'\n",
181-
" \n",
182-
" save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. \n",
183-
" #Ideally this should replace the meta_models inside the modelpack\n",
184-
"\n",
185-
" # Initialise and train meta_model\n",
186-
" mc = MetaCAT(tokenizer=tokenizer, embeddings=None, config=config)\n",
187-
" results = mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)"
188-
]
189-
},
190145
{
191146
"cell_type": "markdown",
192147
"id": "ab23e424",

0 commit comments

Comments
 (0)