Skip to content

Commit 649c814

Browse files
committed
bias feature
1 parent b2cb0e7 commit 649c814

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

webstruct/features/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
'ZIPCODE', 'EMAIL', 'TEL', 'FAX', 'SUBJ', 'FUNC', 'HOURS'}
1616

1717
EXAMPLE_TOKEN_FEATURES = [
18+
bias,
1819
parent_tag,
1920
borders,
2021
block_length,

webstruct/features/token_features.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import re
44

55
__all__ = [
6+
'bias',
67
'token_identity',
78
'token_lower',
89
'token_shape',
@@ -15,6 +16,11 @@
1516
'SuffixFeatures',
1617
]
1718

19+
20+
def bias(html_token):
21+
return {'bias': 1}
22+
23+
1824
def token_identity(html_token):
1925
return {'token': html_token.token}
2026

@@ -112,4 +118,4 @@ def _shape(token):
112118
elif re.match('\w+$', token):
113119
return 'mixedcase'
114120
else:
115-
return 'other'
121+
return 'other'

0 commit comments

Comments
 (0)