|
114 | 114 | "id": "83701c19", |
115 | 115 | "metadata": {}, |
116 | 116 | "source": [ |
117 | | - "# For LSTM model" |
| 117 | + "# For LSTM and BERT model" |
118 | 118 | ] |
119 | 119 | }, |
120 | 120 | { |
|
125 | 125 | "outputs": [], |
126 | 126 | "source": [ |
127 | 127 | "for meta_model in meta_model_names:\n", |
128 | | - " vocab_file = os.path.join(base_dir_meta_models,\"meta_\"+meta_model,'bbpe-vocab.json')\n", |
129 | | - " merges_file = os.path.join(base_dir_meta_models,\"meta_\"+meta_model,'bbpe-merges.txt')\n", |
130 | | - " tokenizer = TokenizerWrapperBPE(ByteLevelBPETokenizer(vocab=vocab_file,\n", |
131 | | - " merges=merges_file,\n", |
132 | | - " lowercase=True))\n", |
133 | | - " # load and sort out the config\n", |
134 | | - " config_file = os.path.join(base_dir_meta_models,\"meta_\"+meta_model,\"config.json\")\n", |
135 | | - " with open(config_file, 'r') as jfile:\n", |
136 | | - " config_dict = json.load(jfile)\n", |
137 | | - " config = ConfigMetaCAT()\n", |
138 | | - " for key, value in config_dict.items():\n", |
139 | | - " setattr(config, key, value['py/state']['__dict__'])\n", |
140 | | - " \n", |
| 128 | + " \n", |
| 129 | + " # load the meta_model\n", |
| 130 | + " mc = MetaCAT.load(save_dir_path=os.path.join(base_dir_meta_models,\"meta_\"+meta_model))\n", |
| 131 | + "\n", |
| 132 | + " # changing parameters\n", |
| 133 | + " mc.config.train['nepochs'] = 15\n", |
| 134 | + "\n", |
141 | 135 | " save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. \n", |
142 | 136 | " #Ideally this should replace the meta_models inside the modelpack\n", |
143 | 137 | "\n", |
144 | | - " # Initialise and train meta_model\n", |
145 | | - " mc = MetaCAT(tokenizer=tokenizer, embeddings=None, config=config)\n", |
| 138 | + " # train the meta_model\n", |
146 | 139 | " results = mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)\n", |
147 | 140 | " \n", |
148 | 141 | " # Save results\n", |
149 | 142 | " json.dump(results['report'], open(os.path.join(save_dir_path,'meta_'+meta_model+'_results.json'), 'w'))" |
150 | 143 | ] |
151 | 144 | }, |
152 | | - { |
153 | | - "cell_type": "markdown", |
154 | | - "id": "91ff4e28", |
155 | | - "metadata": {}, |
156 | | - "source": [ |
157 | | - "# For BERT model" |
158 | | - ] |
159 | | - }, |
160 | | - { |
161 | | - "cell_type": "code", |
162 | | - "execution_count": null, |
163 | | - "id": "e255dda2", |
164 | | - "metadata": {}, |
165 | | - "outputs": [], |
166 | | - "source": [ |
167 | | - "for meta_model in meta_model_names:\n", |
168 | | - " # load and sort out the config\n", |
169 | | - " config_file = os.path.join(base_dir_meta_models,\"meta_\"+meta_model,\"config.json\")\n", |
170 | | - " with open(config_file, 'r') as jfile:\n", |
171 | | - " config_dict = json.load(jfile)\n", |
172 | | - " config = ConfigMetaCAT()\n", |
173 | | - " for key, value in config_dict.items():\n", |
174 | | - " setattr(config, key, value['py/state']['__dict__'])\n", |
175 | | - "\n", |
176 | | - " tokenizer = TokenizerWrapperBERT.load(os.path.join(base_dir_meta_models,\"meta_\"+meta_model), \n", |
177 | | - " config.model['model_variant'])\n", |
178 | | - " \n", |
179 | | - " # change model name if training BERT for the first time\n", |
180 | | - " config.model['model_name'] = 'bert'\n", |
181 | | - " \n", |
182 | | - " save_dir_path= \"test_meta_\"+meta_model # Where to save the meta_model and results. \n", |
183 | | - " #Ideally this should replace the meta_models inside the modelpack\n", |
184 | | - "\n", |
185 | | - " # Initialise and train meta_model\n", |
186 | | - " mc = MetaCAT(tokenizer=tokenizer, embeddings=None, config=config)\n", |
187 | | - " results = mc.train_from_json(mctrainer_export_path, save_dir_path=save_dir_path)" |
188 | | - ] |
189 | | - }, |
190 | 145 | { |
191 | 146 | "cell_type": "markdown", |
192 | 147 | "id": "ab23e424", |
|
0 commit comments