22from __future__ import absolute_import
33import unittest
44from webstruct import GateLoader , HtmlTokenizer , HtmlFeatureExtractor
5- from webstruct .features import token_lower , token_identity , Pattern
5+ from webstruct .features import token_lower , token_identity , looks_like_year , Pattern
66
77
88class PatternTest (unittest .TestCase ):
@@ -17,19 +17,20 @@ def _load_document(self):
1717 return html_tokens
1818
1919 def test_pattern (self ):
20+ #, (0, 'looks_like_year')
2021 featextractor = HtmlFeatureExtractor (
21- token_features = [token_lower , token_identity ],
22+ token_features = [token_lower , token_identity , looks_like_year ],
2223 global_features = [
23- Pattern ((- 2 , 'lower' ), (- 1 , 'lower' ))
24+ Pattern ((- 2 , 'lower' ), (- 1 , 'lower' ), ( - 1 , 'looks_like_year' ) )
2425 ]
2526 )
2627 X = featextractor .transform_single (self .html_tokens )
27-
28- key = 'lower[-2]/lower[-1]'
28+ key = 'lower[-2]/lower[-1]/looks_like_year[-1]'
2929 self .assertNotIn (key , X [0 ])
3030 self .assertListEqual (
3131 [feat [key ] for feat in X [1 :]],
32- ['?/hello' , 'hello/john' , 'john/doe' , 'doe/mary' ],
32+ ['?/hello/False' , 'hello/john/False' , 'john/doe/False' ,
33+ 'doe/mary/False' ],
3334 )
3435
3536 def test_pattern_lookups (self ):
0 commit comments