@@ -107,3 +107,68 @@ def test_amend_domain_values_retains_original_if_prefix_matching(self):
107107 ]
108108 _data_extractor = DataExtractor (test_data )
109109 assert _data_extractor .amend_domain_values () == test_data
110+
111+ def test_cleanse_data (self ):
112+ test_data = [
113+ {
114+ 'name' : 'Google' ,
115+ 'url' : 'https://www.google.co.uk' ,
116+ 'domain' : 'google.co.uk' ,
117+ 'secure' : False ,
118+ 'value' : 5
119+ },
120+ {
121+ 'name' : 'Facebook' ,
122+ 'url' : 'http://developers.facebook.com/blog/post/2018/10/02/facebook-login-update/' ,
123+ 'domain' : 'facebook.com' ,
124+ 'secure' : True ,
125+ 'value' : 4
126+ },
127+ {
128+ 'name' : 'Bing' ,
129+ 'url' : 'http://www.bing.com/search?q=athlete&qs=n&form=QBLH&sp=-1&pq=athlete&sc=8-7&sk=&cvid=53830DD7FB2E47B7A5D9CF27F106BC9A' ,
130+ 'domain' : 'bing.com' ,
131+ 'secure' : False ,
132+ 'value' : 3
133+ },
134+ {
135+ 'name' : 'Duck Duck Go' ,
136+ 'url' : 'https://duckduckgo.com/?q=plane&t=h_&ia=web' ,
137+ 'domain' : 'duckduckgo.com' ,
138+ 'secure' : True ,
139+ 'value' : 2
140+ },
141+ ]
142+
143+ expected = [
144+ {
145+ 'name' : 'Google' ,
146+ 'url' : 'https://www.google.co.uk' ,
147+ 'domain' : 'google.co.uk' ,
148+ 'secure' : True ,
149+ 'value' : 5
150+ },
151+ {
152+ 'name' : 'Facebook' ,
153+ 'url' : 'http://developers.facebook.com/blog/post/2018/10/02/facebook-login-update/' ,
154+ 'domain' : 'facebook.com' ,
155+ 'secure' : False ,
156+ 'value' : 4
157+ },
158+ {
159+ 'name' : 'Bing' ,
160+ 'url' : 'http://www.bing.com/search?q=athlete&qs=n&form=QBLH&sp=-1&pq=athlete&sc=8-7&sk=&cvid=53830DD7FB2E47B7A5D9CF27F106BC9A' ,
161+ 'domain' : 'bing.com' ,
162+ 'secure' : False ,
163+ 'value' : 3
164+ },
165+ {
166+ 'name' : 'Duck Duck Go' ,
167+ 'url' : 'https://duckduckgo.com/?q=plane&t=h_&ia=web' ,
168+ 'domain' : 'duckduckgo.com' ,
169+ 'secure' : True ,
170+ 'value' : 2
171+ },
172+ ]
173+ _data_extractor = DataExtractor (test_data )
174+ assert _data_extractor .cleanse_data () == expected
0 commit comments