Skip to content

Commit 42016ab

Browse files
committed
Replace logging calls with module logger
1 parent 217d5ce commit 42016ab

File tree

10 files changed

+75
-73
lines changed

10 files changed

+75
-73
lines changed

textractor/entities/document.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from textractor.data.html_linearization_config import HTMLLinearizationConfig
4040
from textractor.entities.linearizable import Linearizable
4141

42+
logger = logging.getLogger(__name__)
4243

4344
class Document(SpatialObject, Linearizable):
4445
"""
@@ -389,7 +390,7 @@ def get_words_by_type(self, text_type: TextTypes = TextTypes.PRINTED) -> List[Wo
389390
:rtype: EntityList[Word]
390391
"""
391392
if not self.words:
392-
logging.warn("Document contains no word entities.")
393+
logger.warning("Document contains no word entities.")
393394
return []
394395

395396
filtered_words = EntityList()
@@ -554,12 +555,12 @@ def get(
554555
lowest_similarity = top_n[-1][1]
555556

556557
if not top_n:
557-
logging.warning(
558+
logger.warning(
558559
f"Query key does not match any existing keys in the document.{os.linesep}{self.keys()}"
559560
)
560561
return EntityList([])
561562

562-
logging.info(f"Query key matched {len(top_n)} key-values in the document.")
563+
logger.info(f"Query key matched {len(top_n)} key-values in the document.")
563564

564565
return EntityList([value[0] for value in top_n])
565566

@@ -586,14 +587,14 @@ def export_kv_to_csv(
586587
keys = []
587588
values = []
588589
if include_kv and not self.key_values:
589-
logging.warning("Document does not contain key-values.")
590+
logger.warning("Document does not contain key-values.")
590591
elif include_kv:
591592
for kv in self.key_values:
592593
keys.append(" ".join([w.text for w in kv.key]))
593594
values.append(kv.value.get_text())
594595

595596
if include_checkboxes and not self.checkboxes:
596-
logging.warning("Document does not contain checkbox elements.")
597+
logger.warning("Document does not contain checkbox elements.")
597598
elif include_checkboxes:
598599
for kv in self.checkboxes:
599600
keys.append(" ".join([w.text for w in kv.key]))
@@ -604,7 +605,7 @@ def export_kv_to_csv(
604605
for k, v in zip(keys, values):
605606
f.write(f"{k}{sep}{v}{os.linesep}")
606607

607-
logging.info(
608+
logger.info(
608609
f"csv file stored at location {os.path.join(os.getcwd(),filepath)}"
609610
)
610611

@@ -627,7 +628,7 @@ def export_kv_to_txt(
627628
export_str = ""
628629
index = 1
629630
if include_kv and not self.key_values:
630-
logging.warning("Document does not contain key-values.")
631+
logger.warning("Document does not contain key-values.")
631632
elif include_kv:
632633
for kv in self.key_values:
633634
export_str += (
@@ -636,15 +637,15 @@ def export_kv_to_txt(
636637
index += 1
637638

638639
if include_checkboxes and not self.checkboxes:
639-
logging.warning("Document does not contain checkbox elements.")
640+
logger.warning("Document does not contain checkbox elements.")
640641
elif include_checkboxes:
641642
for kv in self.checkboxes:
642643
export_str += f"{index}. {kv.key.__repr__()} : {kv.value.children[0].status.name}{os.linesep}"
643644
index += 1
644645

645646
with open(filepath, "w") as text_file:
646647
text_file.write(export_str)
647-
logging.info(
648+
logger.info(
648649
f"txt file stored at location {os.path.join(os.getcwd(),filepath)}"
649650
)
650651

@@ -657,7 +658,7 @@ def export_tables_to_excel(self, filepath):
657658
:type filepath: str, required
658659
"""
659660
if not filepath:
660-
logging.error("Filepath required to store excel file.")
661+
logger.error("Filepath required to store excel file.")
661662
workbook = xlsxwriter.Workbook(filepath)
662663
for table in self.tables:
663664
workbook = table.to_excel(
@@ -671,7 +672,7 @@ def independent_words(self):
671672
:rtype: EntityList[Word]
672673
"""
673674
if not self.words:
674-
logging.warning("Words have not been assigned to this Document object.")
675+
logger.warning("Words have not been assigned to this Document object.")
675676
return []
676677

677678
else:
@@ -824,7 +825,7 @@ def _get_coords(self, word_1, word_2, direction, page):
824825
)
825826

826827
if not word_1_objects:
827-
logging.warning(f"{word_1} not found in page {page}")
828+
logger.warning(f"{word_1} not found in page {page}")
828829
return -1, -1, -1, -1
829830
else:
830831
word_1_obj = word_1_objects[0]
@@ -839,7 +840,7 @@ def _get_coords(self, word_1, word_2, direction, page):
839840
)
840841
word_2_objects = [word for word in word_2_objects if word.page == page]
841842
if not word_2_objects:
842-
logging.warning(f"{word_2} not found in page {page}")
843+
logger.warning(f"{word_2} not found in page {page}")
843844
return -1, -1, -1, -1
844845
else:
845846
word_2_obj = word_2_objects[0]

textractor/entities/expense_field.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from textractor.data.constants import AnalyzeExpenseLineItemFields as AELineItems
99
from typing import List, Tuple
1010

11+
logger = logging.getLogger(__name__)
1112

1213
@dataclasses.dataclass
1314
class ExpenseType:
@@ -257,7 +258,7 @@ def to_pandas(self, include_EXPENSE_ROW=False):
257258
try:
258259
from pandas import DataFrame
259260
except ImportError:
260-
logging.info(
261+
logger.info(
261262
"pandas library is required for exporting tables to DataFrame objects"
262263
)
263264
return None

textractor/entities/identity_document.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
11
"""The IdentityDocument class is the object representation of an AnalyzeID response. It is similar to a dictionary. Despite its name it does not inherit from Document as the AnalyzeID response does not contains position information."""
22

33
import os
4-
import string
5-
import logging
6-
import xlsxwriter
74
from typing import List, Dict, Union
8-
from copy import deepcopy
9-
from collections import defaultdict
105
from textractor.data.constants import AnalyzeIDFields
116
from textractor.entities.bbox import SpatialObject
127
from textractor.entities.identity_field import IdentityField

textractor/entities/key_value.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from textractor.visualizers.entitylist import EntityList
2121
from textractor.utils.html_utils import add_id_to_html_tag
2222

23+
logger = logging.getLogger(__name__)
2324

2425
class KeyValue(DocumentEntity):
2526
"""
@@ -101,7 +102,7 @@ def key(self):
101102
:rtype: EntityList[Word]
102103
"""
103104
if not self._words:
104-
logging.info("Key contains no words objects.")
105+
logger.info("Key contains no words objects.")
105106
return []
106107
return self._words
107108

@@ -123,7 +124,7 @@ def value(self) -> Value:
123124
:rtype: Value
124125
"""
125126
if self._value is None:
126-
logging.warning(
127+
logger.warning(
127128
"Asked for a value but it was never attributed "
128129
"-> make sure to assign value to key with the `kv.value = <Value Object>` property setter"
129130
)
@@ -193,7 +194,7 @@ def get_words_by_type(self, text_type: str = TextTypes.PRINTED) -> List[Word]:
193194
)
194195

195196
if not self.words:
196-
logging.info("Document contains no word entities.")
197+
logger.info("Document contains no word entities.")
197198
return []
198199
else:
199200
return EntityList(
@@ -211,12 +212,12 @@ def is_selected(self) -> bool:
211212
if len(self.value.children) == 1:
212213
return self.value.children[0].is_selected()
213214
else:
214-
logging.info(
215+
logger.info(
215216
"is_checked() was called on a KeyValue that contains more than one checkbox. Returning first checkbox"
216217
)
217218
return self.value.children[0].is_selected()
218219
else:
219-
logging.info(
220+
logger.info(
220221
"is_checked() was called on a KeyValue that does not contain checkboxes. Returning False"
221222
)
222223
return False

textractor/entities/page.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
from textractor.visualizers.entitylist import EntityList
4545
from textractor.entities.linearizable import Linearizable
4646

47+
logger = logging.getLogger(__name__)
4748

4849
class Page(SpatialObject, Linearizable):
4950
"""
@@ -435,7 +436,7 @@ def filter_checkboxes(
435436
:rtype: EntityList[KeyValue]
436437
"""
437438
if not self.checkboxes:
438-
logging.warning(f"This document does not contain checkboxes")
439+
logger.warning(f"This document does not contain checkboxes")
439440
return []
440441
else:
441442
if selected and not_selected:
@@ -476,7 +477,7 @@ def get_words_by_type(
476477
)
477478

478479
if not self.words:
479-
logging.warn("Document contains no word entities.")
480+
logger.warning("Document contains no word entities.")
480481
return []
481482

482483
filtered_words = [word for word in self.words if word.text_type == text_type]
@@ -750,11 +751,11 @@ def get(
750751
lowest_similarity = top_n[-1][1]
751752

752753
if not top_n:
753-
logging.warning(
754+
logger.warning(
754755
f"Query key does not match any existing keys in the document.{os.linesep}{self.keys()}"
755756
)
756757

757-
logging.info(f"Query key matched {len(top_n)} key-values in the document.")
758+
logger.info(f"Query key matched {len(top_n)} key-values in the document.")
758759

759760
return EntityList([value[0] for value in top_n])
760761

@@ -778,14 +779,14 @@ def export_kv_to_csv(
778779
keys = []
779780
values = []
780781
if include_kv and not self.key_values:
781-
logging.warning("Document does not contain key-values.")
782+
logger.warning("Document does not contain key-values.")
782783
elif include_kv:
783784
for kv in self.key_values:
784785
keys.append(kv.key.__repr__())
785786
values.append(kv.value.__repr__())
786787

787788
if include_checkboxes and not self.checkboxes:
788-
logging.warning("Document does not contain checkbox elements.")
789+
logger.warning("Document does not contain checkbox elements.")
789790
elif include_checkboxes:
790791
for kv in self.checkboxes:
791792
keys.append(kv.key.__repr__())
@@ -796,7 +797,7 @@ def export_kv_to_csv(
796797
for k, v in zip(keys, values):
797798
f.write(f"{k},{v}{os.linesep}")
798799

799-
logging.info(
800+
logger.info(
800801
f"csv file stored at location {os.path.join(os.getcwd(), filepath)}"
801802
)
802803

@@ -819,7 +820,7 @@ def export_kv_to_txt(
819820
export_str = []
820821
index = 1
821822
if include_kv and not self.key_values:
822-
logging.warning("Document does not contain key-values.")
823+
logger.warning("Document does not contain key-values.")
823824
elif include_kv:
824825
for kv in self.key_values:
825826
export_str.append(
@@ -828,7 +829,7 @@ def export_kv_to_txt(
828829
index += 1
829830

830831
if include_checkboxes and not self.checkboxes:
831-
logging.warning("Document does not contain checkbox elements.")
832+
logger.warning("Document does not contain checkbox elements.")
832833
elif include_checkboxes:
833834
for kv in self.checkboxes:
834835
export_str.append(
@@ -838,7 +839,7 @@ def export_kv_to_txt(
838839

839840
with open(filepath, "w") as text_file:
840841
text_file.write("".join(export_str))
841-
logging.info(
842+
logger.info(
842843
f"txt file stored at location {os.path.join(os.getcwd(),filepath)}"
843844
)
844845

@@ -848,7 +849,7 @@ def independent_words(self) -> EntityList[Word]:
848849
:rtype: EntityList[Word]
849850
"""
850851
if not self.words:
851-
logging.warning("Words have not been assigned to this Document object.")
852+
logger.warning("Words have not been assigned to this Document object.")
852853
return []
853854

854855
else:
@@ -871,7 +872,7 @@ def export_tables_to_excel(self, filepath):
871872
:type filepath: str, required
872873
"""
873874
if not filepath:
874-
logging.error("Filepath required to store excel file.")
875+
logger.error("Filepath required to store excel file.")
875876
workbook = xlsxwriter.Workbook(filepath)
876877
for table in self.tables:
877878
workbook = table.to_excel(
@@ -1041,7 +1042,7 @@ def _get_coords(self, word_1, word_2, direction):
10411042
)
10421043

10431044
if not word_1_objects:
1044-
logging.warning(f"{word_1} not found in page")
1045+
logger.warning(f"{word_1} not found in page")
10451046
return -1, -1, -1, -1
10461047
else:
10471048
word_1_obj = word_1_objects[0]
@@ -1056,7 +1057,7 @@ def _get_coords(self, word_1, word_2, direction):
10561057
)
10571058

10581059
if not word_2_objects:
1059-
logging.warning(f"{word_2} not found in page")
1060+
logger.warning(f"{word_2} not found in page")
10601061
return -1, -1, -1, -1
10611062
else:
10621063
word_2_obj = word_2_objects[0]

textractor/entities/signature.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
bounding box information, page number, Page ID and confidence of detection.
77
"""
88

9-
import logging
10-
from typing import List
119
import uuid
1210
from textractor.data.text_linearization_config import TextLinearizationConfig
1311

0 commit comments

Comments
 (0)