Skip to content

Commit a9c1926

Browse files
authored
Handle null EntityTypes
2 parents 28d6110 + 2699fc4 commit a9c1926

File tree

1 file changed

+14
-9
lines changed

1 file changed

+14
-9
lines changed

textractor/parsers/response_parser.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,11 @@ def _filter_by_entity(
116116
return {
117117
block["Id"]: block
118118
for block in block_json
119-
if block["EntityTypes"][0] == entity_type
119+
if (
120+
"EntityTypes" in block and
121+
len(block["EntityTypes"]) and
122+
block["EntityTypes"][0] == entity_type
123+
)
120124
}
121125

122126

@@ -829,6 +833,7 @@ def _create_table_cell_objects(
829833

830834
table_cells = {}
831835
for elem_id, elem in all_table_cells_info.items():
836+
entity_types = elem.get("EntityTypes", []) or []
832837
table_cells[elem_id] = TableCell(
833838
entity_id=elem_id,
834839
bbox=BoundingBox.from_normalized_dict(
@@ -839,11 +844,11 @@ def _create_table_cell_objects(
839844
row_span=elem["RowSpan"],
840845
col_span=elem["ColumnSpan"],
841846
confidence=elem["Confidence"],
842-
is_column_header=COLUMN_HEADER in elem.get("EntityTypes", []),
843-
is_title=TABLE_TITLE in elem.get("EntityTypes", []),
844-
is_footer=TABLE_FOOTER in elem.get("EntityTypes", []),
845-
is_summary=TABLE_SUMMARY in elem.get("EntityTypes", []),
846-
is_section_title=TABLE_SECTION_TITLE in elem.get("EntityTypes", []),
847+
is_column_header=COLUMN_HEADER in entity_types,
848+
is_title=TABLE_TITLE in entity_types,
849+
is_footer=TABLE_FOOTER in entity_types,
850+
is_summary=TABLE_SUMMARY in entity_types,
851+
is_section_title=TABLE_SECTION_TITLE in entity_types,
847852
)
848853
table_cells[elem_id].raw_object = elem
849854

@@ -897,9 +902,9 @@ def _create_table_objects(
897902
),
898903
)
899904
# Setting table type based on the entity types present in the table
900-
if TABLE_STRUCTURED in val.get("EntityTypes", []):
905+
if TABLE_STRUCTURED in (val.get("EntityTypes", []) or []):
901906
tables[val["Id"]].table_type = TableTypes.STRUCTURED
902-
elif TABLE_SEMI_STRUCTURED in val.get("EntityTypes", []):
907+
elif TABLE_SEMI_STRUCTURED in (val.get("EntityTypes", []) or []):
903908
tables[val["Id"]].table_type = TableTypes.SEMI_STRUCTURED
904909
else:
905910
tables[val["Id"]].table_type = TableTypes.UNKNOWN
@@ -979,7 +984,7 @@ def _create_table_objects(
979984
table_cells[cell_id]._children.append(checkboxes[child_id])
980985

981986
# update metadata
982-
meta_info = cell.get("EntityTypes", [])
987+
meta_info = cell.get("EntityTypes", []) or []
983988
merged_info = [MERGED_CELL] if cell_id in merged_child_ids else []
984989
table_cells[cell_id]._update_response_metadata(meta_info + merged_info)
985990

0 commit comments

Comments
 (0)