1- from websites .resources .data import WEBSITES
2-
3-
41class DataExtractor :
52 """
6- Use to extract, cleanse and amend incorrect website data collection.
3+ Use to extract, cleanse, sum and amend incorrect website data collection.
74 """
85 def __init__ (self , data ):
96 self .data = data
107
118 def find_items (self , value = 4 ):
129 """
13- Find and return a new list of items where key "value" is greater than or equal to parameter value. Default = 4.
10+ Find and return a new list of items where key "value" is greater than or equal to parameter value.
11+ :param value: int, value to find items for, default = 4.
1412 :return: list(dict), list of dictionaries matching the above filtering rule.
1513 """
1614 return [item for item in self .data if item .get ('value' ) and item .get ('value' ) >= value ]
1715
1816 def amend_domain_values (self , prefix = 'www.' ):
1917 """
2018 Fixes missing parts of the domain names.
21- :param prefix: str, prefix to add to the domain name. Default = 'www'.
19+ :param prefix: str, prefix to add to the domain name, default = 'www'.
2220 :return: amended: list(dict), amended list of web records.
2321 """
2422 amended = []
@@ -39,18 +37,17 @@ def cleanse_data(self):
3937 url = item .get ('url' )
4038 secure = item .get ('secure' )
4139 # https marked as secure = False
42- if url and url .startswith ('https:' ) and not item . get ( ' secure' ) :
40+ if url and url .startswith ('https:' ) and not secure :
4341 item ['secure' ] = True
4442 # http marked as secure = True
45- elif url and url .startswith ('http:' ) and item . get ( ' secure' ) :
43+ elif url and url .startswith ('http:' ) and secure :
4644 item ['secure' ] = False
4745 amended .append (item )
4846 return amended
4947
50-
51-
52- # data_extractor = DataExtractor(WEBSITES)
53- # print(data_extractor.amend_domain_values())
54- # print(data_extractor.find_items(4))
55- # print(len(data_extractor.find_items(4)))
56- # print(data_extractor.cleanse_data())
48+ def get_value_sum (self ):
49+ """
50+ Returns sum of all value keys in the data set.
51+ :return: int, sum of all value keys in the data set.
52+ """
53+ return sum ([item .get ('value' , 0 ) for item in self .data ])
0 commit comments