Skip to content

Commit 80a6fb8

Browse files
pep8 style
1 parent 37b8728 commit 80a6fb8

File tree

1 file changed

+41
-27
lines changed

1 file changed

+41
-27
lines changed

webstruct/annotation_verifier.py

Lines changed: 41 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@
1111
'EMAIL', 'PER', 'FUNC', 'SUBJ'
1212
]
1313

14+
1415
def nodes_difference(l, r):
1516
if l.tag != r.tag:
16-
return {'tag': '"{0}" != "{1}"'.format(l.tag, r.tag) }
17+
return {'tag': '"{0}" != "{1}"'.format(l.tag, r.tag)}
1718

1819
l_attrib = [(k, l.attrib[k]) for k in l.attrib]
19-
l_attrib.sort(key = lambda x:x[0])
20+
l_attrib.sort(key=lambda x: x[0])
2021
r_attrib = [(k, r.attrib[k]) for k in r.attrib]
21-
r_attrib.sort(key = lambda x:x[0])
22+
r_attrib.sort(key=lambda x: x[0])
2223

2324
idx = 0
2425
while idx < len(l_attrib) and idx < len(r_attrib):
@@ -27,13 +28,13 @@ def nodes_difference(l, r):
2728
idx = idx + 1
2829

2930
if l_attr != r_attr:
30-
return {'attributes' : '"{0}" != "{1}"'.format(l_attr, r_attr)}
31+
return {'attributes': '"{0}" != "{1}"'.format(l_attr, r_attr)}
3132

3233
if idx < len(l_attrib):
33-
return {'attributes' : "{0} != None".format(l_attrib[idx])}
34+
return {'attributes': "{0} != None".format(l_attrib[idx])}
3435

3536
if idx < len(r_attrib):
36-
return {'attributes' : "None != {0}".format(r_attrib[idx])}
37+
return {'attributes': "None != {0}".format(r_attrib[idx])}
3738

3839
l_text = ''
3940
if l.text:
@@ -44,7 +45,7 @@ def nodes_difference(l, r):
4445
r.text = r.text.strip()
4546

4647
if l_text != r_text:
47-
return {'text' : "{0} != {1}".format(l_text, r_text)}
48+
return {'text': "{0} != {1}".format(l_text, r_text)}
4849

4950
l_tail = ''
5051
if l.tail:
@@ -55,17 +56,18 @@ def nodes_difference(l, r):
5556
r.tail = r.tail.strip()
5657

5758
if l_tail != r_tail:
58-
return {'tail' : "{0} != {1}".format(l_tail, r_tail)}
59+
return {'tail': "{0} != {1}".format(l_tail, r_tail)}
5960

6061
if len(l) != len(r):
61-
return {'children count' : "{0} != {1}".format(len(l), len(r))}
62+
return {'children count': "{0} != {1}".format(len(l), len(r))}
6263

6364
return None
6465

66+
6567
def node_path(node):
6668
ret = ''
6769
current = node
68-
while current != None:
70+
while current is not None:
6971
parent = current.getparent()
7072
idx = 0
7173
if parent:
@@ -76,38 +78,50 @@ def node_path(node):
7678

7779
return ret
7880

81+
7982
def tree_difference(l, r):
8083
stack = [(l, r)]
8184
while stack:
8285
l_node, r_node = stack.pop(0)
8386
diff = nodes_difference(l_node, r_node)
8487

8588
if diff:
86-
return { "l" : node_path(l_node)
87-
, "r" : node_path(r_node)
88-
, "diff" : diff }
89+
return {"l": node_path(l_node),
90+
"r": node_path(r_node),
91+
"diff": diff}
8992

9093
for idx, l_child in enumerate(l_node):
9194
stack.append((l_child, r_node[idx]))
9295

9396
return None
9497

98+
9599
def main():
96100
cmdline = argparse.ArgumentParser()
97-
cmdline.add_argument('--gate', help = 'path to gate annotated file', type=str, required=True)
98-
cmdline.add_argument('--wa', help = 'path to wa annotated file', type=str, required=True)
99-
cmdline.add_argument('--loglevel', help = 'logging level', type=str, default='INFO')
101+
cmdline.add_argument('--gate',
102+
help='path to gate annotated file',
103+
type=str,
104+
required=True)
105+
cmdline.add_argument('--wa',
106+
help='path to wa annotated file',
107+
type=str,
108+
required=True)
109+
cmdline.add_argument('--loglevel',
110+
help='logging level',
111+
type=str,
112+
default='INFO')
100113
args = cmdline.parse_args()
101114

102-
logging.basicConfig( level = getattr(logging, args.loglevel.upper())
103-
, format = '%(asctime)s [%(levelname)s] %(pathname)s:%(lineno)d %(message)s' )
115+
logging.basicConfig(level=getattr(logging, args.loglevel.upper()),
116+
format=('%(asctime)s [%(levelname)s] '
117+
'%(pathname)s:%(lineno)d %(message)s'))
104118

105119
entities = KNOWN_ENTITIES
106120

107-
gate = webstruct.loaders.GateLoader(known_entities = entities)
108-
wa = webstruct.loaders.WebAnnotatorLoader(known_entities = entities)
121+
gate = webstruct.loaders.GateLoader(known_entities=entities)
122+
wa = webstruct.loaders.WebAnnotatorLoader(known_entities=entities)
109123

110-
tokenizer = webstruct.HtmlTokenizer(tagset = entities)
124+
tokenizer = webstruct.HtmlTokenizer(tagset=entities)
111125
with open(args.gate, 'rb') as reader:
112126
data = reader.read()
113127
gate_tree = gate.loadbytes(data)
@@ -125,20 +139,20 @@ def main():
125139
is_diff = True
126140

127141
annot_diff = list()
128-
for idx, (gate_a, wa_a) in enumerate(zip(gate_annotations, wa_annotations)):
142+
for idx, (gate_a, wa_a) in enumerate(zip(gate_annotations,
143+
wa_annotations)):
129144
if gate_a == wa_a:
130145
continue
131146

132-
annot_diff.append({ 'idx' : idx
133-
, 'gate_a' : gate_a
134-
, 'wa_a' : wa_a })
147+
annot_diff.append({'idx': idx,
148+
'gate_a': gate_a,
149+
'wa_a': wa_a})
135150

136151
if annot_diff:
137152
logging.error('annotation differs %s', json.dumps(annot_diff))
138153
is_diff = True
139154

140-
return is_diff == False
155+
return is_diff is False
141156

142157
if __name__ == "__main__":
143158
main()
144-

0 commit comments

Comments
 (0)