1- from websites .resources .data import WEBSITES
2-
3-
41class DataExtractor :
52 """
6- Use to extract, cleanse and amend incorrect website data collection.
3+ Use to extract, cleanse, sum and amend incorrect website data collection.
74 """
85 def __init__ (self , data ):
96 self .data = data
107
118 def find_items (self , value = 4 ):
129 """
13- Find and return a new list of items where key "value" is greater than or equal to parameter value. Default = 4.
10+ Find and return a new list of items where key "value" is greater than or equal to parameter value.
11+ :param value: int, value to find items for, default = 4.
1412 :return: list(dict), list of dictionaries matching the above filtering rule.
1513 """
1614 return [item for item in self .data if item .get ('value' ) and item .get ('value' ) >= value ]
1715
1816 def amend_domain_values (self , prefix = 'www.' ):
1917 """
2018 Fixes missing parts of the domain names.
21- :param prefix: str, prefix to add to the domain name. Default = 'www'.
19+ :param prefix: str, prefix to add to the domain name, default = 'www'.
2220 :return: amended: list(dict), amended list of web records.
2321 """
2422 amended = []
@@ -38,19 +36,19 @@ def cleanse_data(self):
3836 for item in self .data :
3937 url = item .get ('url' )
4038 secure = item .get ('secure' )
41- # https marked as secure = False
42- if url and url .startswith ('https:' ) and not item .get ('secure' ):
43- item ['secure' ] = True
44- # http marked as secure = True
45- elif url and url .startswith ('http:' ) and item .get ('secure' ):
46- item ['secure' ] = False
39+ if url :
40+ # https marked as secure = False
41+ if url .startswith ('https:' ) and not secure :
42+ item ['secure' ] = True
43+ # http marked as secure = True
44+ elif url .startswith ('http:' ) and secure :
45+ item ['secure' ] = False
4746 amended .append (item )
4847 return amended
4948
50-
51-
52- # data_extractor = DataExtractor(WEBSITES)
53- # print(data_extractor.amend_domain_values())
54- # print(data_extractor.find_items(4))
55- # print(len(data_extractor.find_items(4)))
56- # print(data_extractor.cleanse_data())
49+ def get_value_sum (self ):
50+ """
51+ Returns sum of all value keys in the data set.
52+ :return: int, sum of all value keys in the data set.
53+ """
54+ return sum ([item .get ('value' , 0 ) for item in self .data ])
0 commit comments