Skip to content

Commit e6f59d9

Browse files
authored
Replace logging calls with module logger
2 parents 5980c40 + 42016ab commit e6f59d9

File tree

10 files changed

+75
-73
lines changed

10 files changed

+75
-73
lines changed

textractor/entities/document.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from textractor.data.html_linearization_config import HTMLLinearizationConfig
4040
from textractor.entities.linearizable import Linearizable
4141

42+
logger = logging.getLogger(__name__)
4243

4344
class Document(SpatialObject, Linearizable):
4445
"""
@@ -389,7 +390,7 @@ def get_words_by_type(self, text_type: TextTypes = TextTypes.PRINTED) -> List[Wo
389390
:rtype: EntityList[Word]
390391
"""
391392
if not self.words:
392-
logging.warn("Document contains no word entities.")
393+
logger.warning("Document contains no word entities.")
393394
return []
394395

395396
filtered_words = EntityList()
@@ -554,12 +555,12 @@ def get(
554555
lowest_similarity = top_n[-1][1]
555556

556557
if not top_n:
557-
logging.warning(
558+
logger.warning(
558559
f"Query key does not match any existing keys in the document.{os.linesep}{self.keys()}"
559560
)
560561
return EntityList([])
561562

562-
logging.info(f"Query key matched {len(top_n)} key-values in the document.")
563+
logger.info(f"Query key matched {len(top_n)} key-values in the document.")
563564

564565
return EntityList([value[0] for value in top_n])
565566

@@ -586,14 +587,14 @@ def export_kv_to_csv(
586587
keys = []
587588
values = []
588589
if include_kv and not self.key_values:
589-
logging.warning("Document does not contain key-values.")
590+
logger.warning("Document does not contain key-values.")
590591
elif include_kv:
591592
for kv in self.key_values:
592593
keys.append(" ".join([w.text for w in kv.key]))
593594
values.append(kv.value.get_text())
594595

595596
if include_checkboxes and not self.checkboxes:
596-
logging.warning("Document does not contain checkbox elements.")
597+
logger.warning("Document does not contain checkbox elements.")
597598
elif include_checkboxes:
598599
for kv in self.checkboxes:
599600
keys.append(" ".join([w.text for w in kv.key]))
@@ -604,7 +605,7 @@ def export_kv_to_csv(
604605
for k, v in zip(keys, values):
605606
f.write(f"{k}{sep}{v}{os.linesep}")
606607

607-
logging.info(
608+
logger.info(
608609
f"csv file stored at location {os.path.join(os.getcwd(),filepath)}"
609610
)
610611

@@ -670,7 +671,7 @@ def export_kv_to_txt(
670671
export_str = ""
671672
index = 1
672673
if include_kv and not self.key_values:
673-
logging.warning("Document does not contain key-values.")
674+
logger.warning("Document does not contain key-values.")
674675
elif include_kv:
675676
for kv in self.key_values:
676677
export_str += (
@@ -679,15 +680,15 @@ def export_kv_to_txt(
679680
index += 1
680681

681682
if include_checkboxes and not self.checkboxes:
682-
logging.warning("Document does not contain checkbox elements.")
683+
logger.warning("Document does not contain checkbox elements.")
683684
elif include_checkboxes:
684685
for kv in self.checkboxes:
685686
export_str += f"{index}. {kv.key.__repr__()} : {kv.value.children[0].status.name}{os.linesep}"
686687
index += 1
687688

688689
with open(filepath, "w") as text_file:
689690
text_file.write(export_str)
690-
logging.info(
691+
logger.info(
691692
f"txt file stored at location {os.path.join(os.getcwd(),filepath)}"
692693
)
693694

@@ -700,7 +701,7 @@ def export_tables_to_excel(self, filepath):
700701
:type filepath: str, required
701702
"""
702703
if not filepath:
703-
logging.error("Filepath required to store excel file.")
704+
logger.error("Filepath required to store excel file.")
704705
workbook = xlsxwriter.Workbook(filepath)
705706
for table in self.tables:
706707
workbook = table.to_excel(
@@ -714,7 +715,7 @@ def independent_words(self):
714715
:rtype: EntityList[Word]
715716
"""
716717
if not self.words:
717-
logging.warning("Words have not been assigned to this Document object.")
718+
logger.warning("Words have not been assigned to this Document object.")
718719
return []
719720

720721
else:
@@ -867,7 +868,7 @@ def _get_coords(self, word_1, word_2, direction, page):
867868
)
868869

869870
if not word_1_objects:
870-
logging.warning(f"{word_1} not found in page {page}")
871+
logger.warning(f"{word_1} not found in page {page}")
871872
return -1, -1, -1, -1
872873
else:
873874
word_1_obj = word_1_objects[0]
@@ -882,7 +883,7 @@ def _get_coords(self, word_1, word_2, direction, page):
882883
)
883884
word_2_objects = [word for word in word_2_objects if word.page == page]
884885
if not word_2_objects:
885-
logging.warning(f"{word_2} not found in page {page}")
886+
logger.warning(f"{word_2} not found in page {page}")
886887
return -1, -1, -1, -1
887888
else:
888889
word_2_obj = word_2_objects[0]

textractor/entities/expense_field.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from textractor.data.constants import AnalyzeExpenseLineItemFields as AELineItems
99
from typing import List, Tuple
1010

11+
logger = logging.getLogger(__name__)
1112

1213
@dataclasses.dataclass
1314
class ExpenseType:
@@ -257,7 +258,7 @@ def to_pandas(self, include_EXPENSE_ROW=False):
257258
try:
258259
from pandas import DataFrame
259260
except ImportError:
260-
logging.info(
261+
logger.info(
261262
"pandas library is required for exporting tables to DataFrame objects"
262263
)
263264
return None

textractor/entities/identity_document.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
11
"""The IdentityDocument class is the object representation of an AnalyzeID response. It is similar to a dictionary. Despite its name it does not inherit from Document as the AnalyzeID response does not contains position information."""
22

33
import os
4-
import string
5-
import logging
6-
import xlsxwriter
74
from typing import List, Dict, Union
8-
from copy import deepcopy
9-
from collections import defaultdict
105
from textractor.data.constants import AnalyzeIDFields
116
from textractor.entities.bbox import SpatialObject
127
from textractor.entities.identity_field import IdentityField

textractor/entities/key_value.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from textractor.visualizers.entitylist import EntityList
2121
from textractor.utils.html_utils import add_id_to_html_tag
2222

23+
logger = logging.getLogger(__name__)
2324

2425
class KeyValue(DocumentEntity):
2526
"""
@@ -101,7 +102,7 @@ def key(self):
101102
:rtype: EntityList[Word]
102103
"""
103104
if not self._words:
104-
logging.info("Key contains no words objects.")
105+
logger.info("Key contains no words objects.")
105106
return []
106107
return self._words
107108

@@ -123,7 +124,7 @@ def value(self) -> Value:
123124
:rtype: Value
124125
"""
125126
if self._value is None:
126-
logging.warning(
127+
logger.warning(
127128
"Asked for a value but it was never attributed "
128129
"-> make sure to assign value to key with the `kv.value = <Value Object>` property setter"
129130
)
@@ -193,7 +194,7 @@ def get_words_by_type(self, text_type: str = TextTypes.PRINTED) -> List[Word]:
193194
)
194195

195196
if not self.words:
196-
logging.info("Document contains no word entities.")
197+
logger.info("Document contains no word entities.")
197198
return []
198199
else:
199200
return EntityList(
@@ -211,12 +212,12 @@ def is_selected(self) -> bool:
211212
if len(self.value.children) == 1:
212213
return self.value.children[0].is_selected()
213214
else:
214-
logging.info(
215+
logger.info(
215216
"is_checked() was called on a KeyValue that contains more than one checkbox. Returning first checkbox"
216217
)
217218
return self.value.children[0].is_selected()
218219
else:
219-
logging.info(
220+
logger.info(
220221
"is_checked() was called on a KeyValue that does not contain checkboxes. Returning False"
221222
)
222223
return False

textractor/entities/page.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
from textractor.visualizers.entitylist import EntityList
4545
from textractor.entities.linearizable import Linearizable
4646

47+
logger = logging.getLogger(__name__)
4748

4849
class Page(SpatialObject, Linearizable):
4950
"""
@@ -409,7 +410,7 @@ def filter_checkboxes(
409410
:rtype: EntityList[KeyValue]
410411
"""
411412
if not self.checkboxes:
412-
logging.warning(f"This document does not contain checkboxes")
413+
logger.warning(f"This document does not contain checkboxes")
413414
return []
414415
else:
415416
if selected and not_selected:
@@ -450,7 +451,7 @@ def get_words_by_type(
450451
)
451452

452453
if not self.words:
453-
logging.warn("Document contains no word entities.")
454+
logger.warning("Document contains no word entities.")
454455
return []
455456

456457
filtered_words = [word for word in self.words if word.text_type == text_type]
@@ -724,11 +725,11 @@ def get(
724725
lowest_similarity = top_n[-1][1]
725726

726727
if not top_n:
727-
logging.warning(
728+
logger.warning(
728729
f"Query key does not match any existing keys in the document.{os.linesep}{self.keys()}"
729730
)
730731

731-
logging.info(f"Query key matched {len(top_n)} key-values in the document.")
732+
logger.info(f"Query key matched {len(top_n)} key-values in the document.")
732733

733734
return EntityList([value[0] for value in top_n])
734735

@@ -755,14 +756,14 @@ def export_kv_to_csv(
755756
keys = []
756757
values = []
757758
if include_kv and not self.key_values:
758-
logging.warning("Document does not contain key-values.")
759+
logger.warning("Document does not contain key-values.")
759760
elif include_kv:
760761
for kv in self.key_values:
761762
keys.append(kv.key.__repr__())
762763
values.append(kv.value.__repr__())
763764

764765
if include_checkboxes and not self.checkboxes:
765-
logging.warning("Document does not contain checkbox elements.")
766+
logger.warning("Document does not contain checkbox elements.")
766767
elif include_checkboxes:
767768
for kv in self.checkboxes:
768769
keys.append(kv.key.__repr__())
@@ -773,7 +774,7 @@ def export_kv_to_csv(
773774
for k, v in zip(keys, values):
774775
f.write(f"{k}{sep}{v}{os.linesep}")
775776

776-
logging.info(
777+
logger.info(
777778
f"csv file stored at location {os.path.join(os.getcwd(), filepath)}"
778779
)
779780

@@ -796,7 +797,7 @@ def export_kv_to_txt(
796797
export_str = []
797798
index = 1
798799
if include_kv and not self.key_values:
799-
logging.warning("Document does not contain key-values.")
800+
logger.warning("Document does not contain key-values.")
800801
elif include_kv:
801802
for kv in self.key_values:
802803
export_str.append(
@@ -805,7 +806,7 @@ def export_kv_to_txt(
805806
index += 1
806807

807808
if include_checkboxes and not self.checkboxes:
808-
logging.warning("Document does not contain checkbox elements.")
809+
logger.warning("Document does not contain checkbox elements.")
809810
elif include_checkboxes:
810811
for kv in self.checkboxes:
811812
export_str.append(
@@ -815,7 +816,7 @@ def export_kv_to_txt(
815816

816817
with open(filepath, "w") as text_file:
817818
text_file.write("".join(export_str))
818-
logging.info(
819+
logger.info(
819820
f"txt file stored at location {os.path.join(os.getcwd(),filepath)}"
820821
)
821822

@@ -825,7 +826,7 @@ def independent_words(self) -> EntityList[Word]:
825826
:rtype: EntityList[Word]
826827
"""
827828
if not self.words:
828-
logging.warning("Words have not been assigned to this Document object.")
829+
logger.warning("Words have not been assigned to this Document object.")
829830
return []
830831

831832
else:
@@ -848,7 +849,7 @@ def export_tables_to_excel(self, filepath):
848849
:type filepath: str, required
849850
"""
850851
if not filepath:
851-
logging.error("Filepath required to store excel file.")
852+
logger.error("Filepath required to store excel file.")
852853
workbook = xlsxwriter.Workbook(filepath)
853854
for table in self.tables:
854855
workbook = table.to_excel(
@@ -1018,7 +1019,7 @@ def _get_coords(self, word_1, word_2, direction):
10181019
)
10191020

10201021
if not word_1_objects:
1021-
logging.warning(f"{word_1} not found in page")
1022+
logger.warning(f"{word_1} not found in page")
10221023
return -1, -1, -1, -1
10231024
else:
10241025
word_1_obj = word_1_objects[0]
@@ -1033,7 +1034,7 @@ def _get_coords(self, word_1, word_2, direction):
10331034
)
10341035

10351036
if not word_2_objects:
1036-
logging.warning(f"{word_2} not found in page")
1037+
logger.warning(f"{word_2} not found in page")
10371038
return -1, -1, -1, -1
10381039
else:
10391040
word_2_obj = word_2_objects[0]

textractor/entities/signature.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
bounding box information, page number, Page ID and confidence of detection.
77
"""
88

9-
import logging
10-
from typing import List
119
import uuid
1210
from textractor.data.text_linearization_config import TextLinearizationConfig
1311

0 commit comments

Comments
 (0)