You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: textractor/data/text_linearization_config.py
+4-4Lines changed: 4 additions & 4 deletions
Original file line number
Diff line number
Diff line change
@@ -68,6 +68,10 @@ class TextLinearizationConfig:
68
68
69
69
table_tabulate_remove_extra_hyphens: bool=False#: By default markdown tables will have N hyphens to preserve alignement, this reduces the number of hyphens to 1, which is the minimum number allowed by the GitHub Markdown spec
70
70
71
+
table_duplicate_text_in_merged_cells: bool=False#: Duplicate text in merged cells to preserve line alignment
72
+
73
+
table_flatten_headers: bool=False#: Flatten table headers into a single row, unmerging the cells horizontally
74
+
71
75
table_min_table_words: int=0#: Threshold below which tables will be rendered as words instead of using table layout
72
76
73
77
table_column_separator: str="\t"#: Table column separator, used when linearizing layout tables, not used if AnalyzeDocument was called with the TABLES feature
@@ -147,7 +151,3 @@ class TextLinearizationConfig:
147
151
add_prefixes_and_suffixes_as_words: bool=False#: Controls if the prefixes/suffixes will be inserted in the words returned by `get_text_and_words`
148
152
149
153
add_prefixes_and_suffixes_in_text: bool=True#: Controls if the prefixes/suffixes will be added to the linearized text
150
-
151
-
duplicate_text_in_merged_cells: bool=False#: Duplicate text in merged cells to preserve line alignment
152
-
153
-
table_flatten_headers: bool=False#: Flatten table headers into a single row, unmerging the cells horizontally
0 commit comments