File tree Expand file tree Collapse file tree 2 files changed +53
-2
lines changed Expand file tree Collapse file tree 2 files changed +53
-2
lines changed Original file line number Diff line number Diff line change 66
66
LAYOUT_TABLE ,
67
67
LAYOUT_KEY_VALUE ,
68
68
)
69
+ from textractor .utils .legacy_utils import converter
69
70
70
71
THRESHOLD = 0.95
71
72
@@ -1542,7 +1543,6 @@ def parser_analyze_expense_response(response):
1542
1543
document .response = response
1543
1544
return document
1544
1545
1545
-
1546
1546
def parse (response : dict ) -> Document :
1547
1547
"""
1548
1548
Ingests response data and API Call Mode and calls the appropriate function for it.
@@ -1559,4 +1559,4 @@ def parse(response: dict) -> Document:
1559
1559
if "ExpenseDocuments" in response :
1560
1560
return parser_analyze_expense_response (response )
1561
1561
else :
1562
- return parse_document_api_response (response )
1562
+ return parse_document_api_response (converter ( response ) )
Original file line number Diff line number Diff line change
1
+ from textractor .data .constants import (
2
+ LAYOUT_FIGURE ,
3
+ LAYOUT_LIST ,
4
+ LAYOUT_TABLE ,
5
+ LAYOUT_KEY_VALUE ,
6
+ LAYOUT_TEXT ,
7
+ LAYOUT_TITLE ,
8
+ LAYOUT_HEADER ,
9
+ LAYOUT_FOOTER ,
10
+ LAYOUT_SECTION_HEADER ,
11
+ LAYOUT_PAGE_NUMBER ,
12
+ )
13
+
14
+ def converter (response ):
15
+ blocks_to_delete = []
16
+ page_block = None
17
+ for i , block in enumerate (response ["Blocks" ]):
18
+ if block .get ("BlockType" ) == "PAGE" :
19
+ page_block = block
20
+ elif block .get ("BlockType" , "" ).startswith ("LAYOUT_FIGURE_" ):
21
+ block ["BlockType" ] = LAYOUT_TEXT
22
+ elif (
23
+ block .get ("BlockType" , "" ).startswith ("LAYOUT_" ) and
24
+ block .get ("BlockType" ) not in [
25
+ LAYOUT_TEXT ,
26
+ LAYOUT_TITLE ,
27
+ LAYOUT_HEADER ,
28
+ LAYOUT_FOOTER ,
29
+ LAYOUT_SECTION_HEADER ,
30
+ LAYOUT_PAGE_NUMBER ,
31
+ LAYOUT_LIST ,
32
+ LAYOUT_FIGURE ,
33
+ LAYOUT_TABLE ,
34
+ LAYOUT_KEY_VALUE ,
35
+ ]
36
+ ):
37
+ block ["BlockType" ] = LAYOUT_FIGURE
38
+ elif block .get ("BlockType" ) == LAYOUT_FIGURE and "CONTAINER" in block .get ("EntityTypes" , []):
39
+ blocks_to_delete .append ((i , block ))
40
+
41
+ page_relationships = []
42
+ for relationship in page_block ["Relationships" ]:
43
+ if relationship ["Type" ] == "CHILD" :
44
+ page_relationships = relationship ["Ids" ]
45
+ break
46
+
47
+ for i , block in blocks_to_delete [::- 1 ]:
48
+ del response ["Blocks" ][i ]
49
+ page_relationships .remove (block ["Id" ])
50
+
51
+ return response
You can’t perform that action at this time.
0 commit comments