File tree Expand file tree Collapse file tree 2 files changed +34
-1
lines changed
Expand file tree Collapse file tree 2 files changed +34
-1
lines changed Original file line number Diff line number Diff line change @@ -89,6 +89,11 @@ def word_tokenize(text,engine='newmm'):
8989 ใช้ Deep Neural Network ในการตัดคำภาษาไทย
9090 '''
9191 from .deepcut import segment
92+ elif engine == 'cutkum' :
93+ '''
94+ ใช้ Deep Neural Network ในการตัดคำภาษาไทย (https://github.com/pucktada/cutkum)
95+ '''
96+ from .cutkum import segment
9297 elif engine == 'wordcutpy' :
9398 '''
9499 wordcutpy ใช้ wordcutpy (https://github.com/veer66/wordcutpy) ในการตัดคำ
@@ -161,4 +166,4 @@ def syllable_tokenize(text1):
161166 i += 1
162167 else :
163168 data = dict_word_tokenize (text = text1 ,data = get_data (),data_type = "list" )
164- return data
169+ return data
Original file line number Diff line number Diff line change 1+ # -*- coding: utf-8 -*-
2+ from __future__ import absolute_import ,unicode_literals
3+ import sys
4+ import os
5+ try :
6+ from cutkum .tokenizer import Cutkum
7+ except ImportError :
8+ '''ในกรณีที่ยังไม่ติดตั้ง deepcut ในระบบ'''
9+ import pip
10+ pip .main (['install' ,'cutkum' ])
11+ try :
12+ from cutkum .tokenizer import Cutkum
13+ except ImportError :
14+ sys .exit ('Error ! using pip install cutkum' )
15+ def get_model ():
16+ path = os .path .join (os .path .expanduser ("~" ), 'pythainlp-data' )
17+ if not os .path .exists (path ):
18+ os .makedirs (path )
19+ path = os .path .join (path , 'lstm.l6.d2.pb' )
20+ if not os .path .exists (path ):
21+ print ("Download models..." )
22+ from urllib import request
23+ request .urlretrieve ("https://raw.githubusercontent.com/pucktada/cutkum/master/model/lstm.l6.d2.pb" ,path )
24+ print ("OK." )
25+ return path
26+ ck = Cutkum (get_model ())
27+ def segment (text ):
28+ return ck .tokenize (text )
You can’t perform that action at this time.
0 commit comments